# -*- coding: utf-8 -*-
"""All torch modules that are used by the testproblems."""
import torch
from torch import nn
from torch.nn import functional as F
from .testproblems_utils import (
_truncated_normal_init,
mean_allcnnc,
residual_block,
tfconv2d,
tfconv2d_transpose,
tfmaxpool2d,
)
class net_mnist_logreg(nn.Sequential):
def __init__(self, num_outputs):
super(net_mnist_logreg, self).__init__()
self.add_module("flatten", nn.Flatten())
self.add_module("dense", nn.Linear(in_features=784, out_features=num_outputs))
# initfrom .cifar100_vgg19 import cifar100_vgg19
nn.init.constant_(self.dense.bias, 0.0)
nn.init.constant_(self.dense.weight, 0.0)
class net_cifar10_3c3d(nn.Sequential):
""" Basic conv net for cifar10/100. The network consists of
- three conv layers with ReLUs, each followed by max-pooling
- two fully-connected layers with ``512`` and ``256`` units and ReLU activation
- output layer with softmax
The weight matrices are initialized using Xavier initialization and the biases
are initialized to ``0.0``."""
def __init__(self, num_outputs):
"""Args:
num_outputs (int): The numer of outputs (i.e. target classes)."""
super(net_cifar10_3c3d, self).__init__()
self.add_module(
"conv1", tfconv2d(in_channels=3, out_channels=64, kernel_size=5)
)
self.add_module("relu1", nn.ReLU())
self.add_module(
"maxpool1", tfmaxpool2d(kernel_size=3, stride=2, tf_padding_type="same"),
)
self.add_module(
"conv2", tfconv2d(in_channels=64, out_channels=96, kernel_size=3)
)
self.add_module("relu2", nn.ReLU())
self.add_module(
"maxpool2", tfmaxpool2d(kernel_size=3, stride=2, tf_padding_type="same"),
)
self.add_module(
"conv3",
tfconv2d(
in_channels=96, out_channels=128, kernel_size=3, tf_padding_type="same",
),
)
self.add_module("relu3", nn.ReLU())
self.add_module(
"maxpool3", tfmaxpool2d(kernel_size=3, stride=2, tf_padding_type="same"),
)
self.add_module("flatten", nn.Flatten())
self.add_module("dense1", nn.Linear(in_features=3 * 3 * 128, out_features=512))
self.add_module("relu4", nn.ReLU())
self.add_module("dense2", nn.Linear(in_features=512, out_features=256))
self.add_module("relu5", nn.ReLU())
self.add_module("dense3", nn.Linear(in_features=256, out_features=num_outputs))
# init the layers
for module in self.modules():
if isinstance(module, nn.Conv2d):
nn.init.constant_(module.bias, 0.0)
nn.init.xavier_normal_(module.weight)
if isinstance(module, nn.Linear):
nn.init.constant_(module.bias, 0.0)
nn.init.xavier_uniform_(module.weight)
class net_mnist_2c2d(nn.Sequential):
""" Basic conv net for (Fashion-)MNIST. The network has been adapted from the `TensorFlow tutorial\
<https://www.tensorflow.org/tutorials/estimators/cnn>`_ and consists of
- two conv layers with ReLUs, each followed by max-pooling
- one fully-connected layers with ReLUs
- output layer with softmax
The weight matrices are initialized with truncated normal (standard deviation
of ``0.05``) and the biases are initialized to ``0.05``."""
def __init__(self, num_outputs):
"""Args:
num_outputs (int): The numer of outputs (i.e. target classes)."""
super(net_mnist_2c2d, self).__init__()
self.add_module(
"conv1",
tfconv2d(
in_channels=1, out_channels=32, kernel_size=5, tf_padding_type="same",
),
)
self.add_module("relu1", nn.ReLU())
self.add_module(
"max_pool1", tfmaxpool2d(kernel_size=2, stride=2, tf_padding_type="same"),
)
self.add_module(
"conv2",
tfconv2d(
in_channels=32, out_channels=64, kernel_size=5, tf_padding_type="same",
),
)
self.add_module("relu2", nn.ReLU())
self.add_module(
"max_pool2", tfmaxpool2d(kernel_size=2, stride=2, tf_padding_type="same"),
)
self.add_module("flatten", nn.Flatten())
self.add_module("dense1", nn.Linear(in_features=7 * 7 * 64, out_features=1024))
self.add_module("relu3", nn.ReLU())
self.add_module("dense2", nn.Linear(in_features=1024, out_features=num_outputs))
# init the layers
for module in self.modules():
if isinstance(module, nn.Conv2d):
nn.init.constant_(module.bias, 0.05)
module.weight.data = _truncated_normal_init(
module.weight.data, mean=0, stddev=0.05
)
if isinstance(module, nn.Linear):
nn.init.constant_(module.bias, 0.05)
module.weight.data = _truncated_normal_init(
module.weight.data, mean=0, stddev=0.05
)
class net_vae(nn.Module):
""" A basic VAE for (Faschion-)MNIST. The network has been adapted from the `here\
<https://towardsdatascience.com/teaching-a-variational-autoencoder-vae-to-draw-mnist-characters-978675c95776>`_
and consists of an encoder:
- With three convolutional layers with each ``64`` filters.
- Using a leaky ReLU activation function with :math:`\\alpha = 0.3`
- Dropout layers after each convolutional layer with a rate of ``0.2``.
and an decoder:
- With two dense layers with ``24`` and ``49`` units and leaky ReLU activation.
- With three deconvolutional layers with each ``64`` filters.
- Dropout layers after the first two deconvolutional layer with a rate of ``0.2``.
- A final dense layer with ``28 x 28`` units and sigmoid activation.
"""
def __init__(self, n_latent):
"""Args:
n_latent (int): Size of the latent space."""
super(net_vae, self).__init__()
self.n_latent = n_latent
# encoding layers
self.conv1 = tfconv2d(
in_channels=1,
out_channels=64,
kernel_size=4,
stride=2,
tf_padding_type="same",
)
self.dropout1 = nn.Dropout(p=0.2)
self.conv2 = tfconv2d(
in_channels=64,
out_channels=64,
kernel_size=4,
stride=2,
tf_padding_type="same",
)
self.dropout2 = nn.Dropout(p=0.2)
self.conv3 = tfconv2d(
in_channels=64,
out_channels=64,
kernel_size=4,
stride=1,
tf_padding_type="same",
)
self.dropout3 = nn.Dropout(p=0.2)
self.dense1 = nn.Linear(in_features=7 * 7 * 64, out_features=self.n_latent)
self.dense2 = nn.Linear(in_features=7 * 7 * 64, out_features=self.n_latent)
# decoding layers
self.dense3 = nn.Linear(in_features=8, out_features=24)
self.dense4 = nn.Linear(in_features=24, out_features=24 * 2 + 1)
self.deconv1 = tfconv2d_transpose(
in_channels=1,
out_channels=64,
kernel_size=4,
stride=2,
tf_padding_type="same",
)
# self.deconv1 = nn.ConvTranspose2d(in_channels=1, out_channels=64, kernel_size=4, stride=2,)
self.dropout4 = nn.Dropout(p=0.2)
self.deconv2 = tfconv2d_transpose(
in_channels=64,
out_channels=64,
kernel_size=4,
stride=1,
tf_padding_type="same",
)
self.dropout5 = nn.Dropout(p=0.2)
self.deconv3 = tfconv2d_transpose(
in_channels=64,
out_channels=64,
kernel_size=4,
stride=1,
tf_padding_type="same",
)
self.dropout6 = nn.Dropout(p=0.2)
self.dense5 = nn.Linear(in_features=14 * 14 * 64, out_features=28 * 28)
# init the layers
for module in self.modules():
if isinstance(module, nn.Conv2d):
nn.init.constant_(module.bias, 0.0)
nn.init.xavier_uniform_(module.weight)
if isinstance(module, nn.ConvTranspose2d):
nn.init.constant_(module.bias, 0.0)
nn.init.xavier_uniform_(module.weight)
if isinstance(module, nn.Linear):
nn.init.constant_(module.bias, 0.0)
nn.init.xavier_uniform_(module.weight)
def encode(self, x):
x = F.leaky_relu(self.conv1(x), negative_slope=0.3)
x = self.dropout1(x)
x = F.leaky_relu(self.conv2(x), negative_slope=0.3)
x = self.dropout2(x)
x = F.leaky_relu(self.conv3(x), negative_slope=0.3)
x = self.dropout3(x)
x = x.view(-1, 7 * 7 * 64)
mean = self.dense1(x)
std_dev = 0.5 * self.dense2(x)
eps = torch.randn_like(std_dev)
z = mean + eps * torch.exp(std_dev)
return z, mean, std_dev
def decode(self, z):
x = F.leaky_relu(self.dense3(z), negative_slope=0.3)
x = F.leaky_relu(self.dense4(x), negative_slope=0.3)
x = x.view(-1, 1, 7, 7)
x = F.relu(self.deconv1(x))
x = self.dropout4(x)
x = F.relu(self.deconv2(x))
x = self.dropout5(x)
x = F.relu(self.deconv3(x))
x = self.dropout6(x)
x = x.view(-1, 14 * 14 * 64)
x = F.sigmoid(self.dense5(x))
images = x.view(-1, 1, 28, 28)
return images
def forward(self, x):
z, mean, std_dev = self.encode(x)
image = self.decode(z)
return image, mean, std_dev
class net_vgg(nn.Sequential):
def __init__(self, num_outputs, variant):
super(net_vgg, self).__init__()
self.add_module("upsampling", nn.UpsamplingBilinear2d(size=(224, 224)))
self.add_module(
"conv11",
tfconv2d(
in_channels=3, out_channels=64, kernel_size=3, tf_padding_type="same",
),
)
self.add_module("relu11", nn.ReLU())
self.add_module(
"conv12",
tfconv2d(
in_channels=64, out_channels=64, kernel_size=3, tf_padding_type="same",
),
)
self.add_module("relu12", nn.ReLU())
self.add_module(
"max_pool1", tfmaxpool2d(kernel_size=2, stride=2, tf_padding_type="same"),
)
self.add_module(
"conv21",
tfconv2d(
in_channels=64, out_channels=128, kernel_size=3, tf_padding_type="same",
),
)
self.add_module("relu21", nn.ReLU())
self.add_module(
"conv22",
tfconv2d(
in_channels=128,
out_channels=128,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu22", nn.ReLU())
self.add_module(
"max_pool2", tfmaxpool2d(kernel_size=2, stride=2, tf_padding_type="same"),
)
self.add_module(
"conv31",
tfconv2d(
in_channels=128,
out_channels=256,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu31", nn.ReLU())
self.add_module(
"conv32",
tfconv2d(
in_channels=256,
out_channels=256,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu32", nn.ReLU())
self.add_module(
"conv33",
tfconv2d(
in_channels=256,
out_channels=256,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu33", nn.ReLU())
if variant == 19:
self.add_module(
"conv34",
tfconv2d(
in_channels=256,
out_channels=256,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu34", nn.ReLU())
self.add_module(
"max_pool3", tfmaxpool2d(kernel_size=2, stride=2, tf_padding_type="same"),
)
self.add_module(
"conv41",
tfconv2d(
in_channels=256,
out_channels=512,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu41", nn.ReLU())
self.add_module(
"conv42",
tfconv2d(
in_channels=512,
out_channels=512,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu42", nn.ReLU())
self.add_module(
"conv43",
tfconv2d(
in_channels=512,
out_channels=512,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu43", nn.ReLU())
if variant == 19:
self.add_module(
"conv44",
tfconv2d(
in_channels=512,
out_channels=512,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu44", nn.ReLU())
self.add_module(
"max_pool4", tfmaxpool2d(kernel_size=2, stride=2, tf_padding_type="same"),
)
self.add_module(
"conv51",
tfconv2d(
in_channels=512,
out_channels=512,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu51", nn.ReLU())
self.add_module(
"conv52",
tfconv2d(
in_channels=512,
out_channels=512,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu52", nn.ReLU())
self.add_module(
"conv53",
tfconv2d(
in_channels=512,
out_channels=512,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu53", nn.ReLU())
if variant == 19:
self.add_module(
"conv54",
tfconv2d(
in_channels=512,
out_channels=512,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu54", nn.ReLU())
self.add_module(
"max_pool5", tfmaxpool2d(kernel_size=2, stride=2, tf_padding_type="same"),
)
self.add_module("flatten", nn.Flatten())
self.add_module("dense1", nn.Linear(in_features=7 * 7 * 512, out_features=4096))
self.add_module("relu1", nn.ReLU())
self.add_module("dropout1", nn.Dropout(p=0.5))
self.add_module("dense2", nn.Linear(in_features=4096, out_features=4096))
self.add_module("relu2", nn.ReLU())
self.add_module("dropout2", nn.Dropout(p=0.5))
self.add_module("dense3", nn.Linear(in_features=4096, out_features=num_outputs))
# init the layers
for module in self.modules():
if isinstance(module, nn.Conv2d):
nn.init.constant_(module.bias, 0.0)
nn.init.xavier_normal_(module.weight)
if isinstance(module, nn.Linear):
nn.init.constant_(module.bias, 0.0)
nn.init.xavier_uniform_(module.weight)
class net_cifar100_allcnnc(nn.Sequential):
def __init__(self):
super(net_cifar100_allcnnc, self).__init__()
self.add_module("dropout1", nn.Dropout(p=0.2))
self.add_module(
"conv1",
tfconv2d(
in_channels=3, out_channels=96, kernel_size=3, tf_padding_type="same",
),
)
self.add_module("relu1", nn.ReLU())
self.add_module(
"conv2",
tfconv2d(
in_channels=96, out_channels=96, kernel_size=3, tf_padding_type="same",
),
)
self.add_module("relu2", nn.ReLU())
self.add_module(
"conv3",
tfconv2d(
in_channels=96,
out_channels=96,
kernel_size=3,
stride=(2, 2),
tf_padding_type="same",
),
)
self.add_module("relu3", nn.ReLU())
self.add_module("dropout2", nn.Dropout(p=0.5))
self.add_module(
"conv4",
tfconv2d(
in_channels=96, out_channels=192, kernel_size=3, tf_padding_type="same",
),
)
self.add_module("relu4", nn.ReLU())
self.add_module(
"conv5",
tfconv2d(
in_channels=192,
out_channels=192,
kernel_size=3,
tf_padding_type="same",
),
)
self.add_module("relu5", nn.ReLU())
self.add_module(
"conv6",
tfconv2d(
in_channels=192,
out_channels=192,
kernel_size=3,
stride=(2, 2),
tf_padding_type="same",
),
)
self.add_module("relu6", nn.ReLU())
self.add_module("dropout3", nn.Dropout(p=0.5))
self.add_module(
"conv7", tfconv2d(in_channels=192, out_channels=192, kernel_size=3)
)
self.add_module("relu7", nn.ReLU())
self.add_module(
"conv8",
tfconv2d(
in_channels=192,
out_channels=192,
kernel_size=1,
tf_padding_type="same",
),
)
self.add_module("relu8", nn.ReLU())
self.add_module(
"conv9",
tfconv2d(
in_channels=192,
out_channels=100,
kernel_size=1,
tf_padding_type="same",
),
)
self.add_module("relu9", nn.ReLU())
self.add_module("mean", mean_allcnnc())
# init the layers
for module in self.modules():
if isinstance(module, nn.Conv2d):
nn.init.constant_(module.bias, 0.1)
nn.init.xavier_normal_(module.weight)
class net_wrn(nn.Sequential):
def __init__(
self, num_residual_blocks, widening_factor, num_outputs, bn_momentum=0.9
):
super(net_wrn, self).__init__()
# initial conv
self.add_module("conv1", tfconv2d(3, 16, 3, bias=False, tf_padding_type="same"))
self._filters = [
16,
16 * widening_factor,
32 * widening_factor,
64 * widening_factor,
]
self._strides = [1, 2, 2]
# loop over three residual groups
for group_number in range(1, 4):
# first residual block is special since it has to change the number of output channels for the skip connection
self.add_module(
"res_unit" + str(group_number) + str(1),
residual_block(
in_channels=self._filters[group_number - 1],
out_channels=self._filters[group_number],
first_stride=self._strides[group_number - 1],
is_first_block=True,
),
)
# loop over further residual blocks of this group
for residual_block_number in range(1, num_residual_blocks):
self.add_module(
"res_unit" + str(group_number) + str(residual_block_number + 1),
residual_block(
in_channels=self._filters[group_number],
out_channels=self._filters[group_number],
),
)
# last layer
self.add_module("bn", nn.BatchNorm2d(self._filters[3], momentum=bn_momentum))
self.add_module("relu", nn.ReLU())
self.add_module("avg_pool", nn.AvgPool2d(8))
# reshape and dense layer
self.add_module("flatten", nn.Flatten())
self.add_module(
"dense", nn.Linear(in_features=self._filters[3], out_features=num_outputs),
)
# initialisation
for module in self.modules():
if isinstance(module, nn.Conv2d):
nn.init.xavier_uniform_(module.weight)
if isinstance(module, nn.BatchNorm2d):
nn.init.constant_(module.weight, 1.0) # gamma
nn.init.constant_(module.bias, 0.0) # beta
nn.init.constant_(module.running_mean, 0.0)
nn.init.constant_(module.running_var, 1.0)
if isinstance(module, nn.Linear):
nn.init.xavier_uniform_(module.weight)
nn.init.constant_(module.bias, 0.0)
class net_char_rnn(nn.Module):
def __init__(self, seq_len, hidden_dim, vocab_size, num_layers):
super(net_char_rnn, self).__init__()
self.embedding = nn.Embedding(
num_embeddings=vocab_size, embedding_dim=hidden_dim
)
self.lstm = nn.LSTM(
input_size=hidden_dim,
hidden_size=hidden_dim,
num_layers=num_layers,
dropout=0.2,
batch_first=True,
)
self.dense = nn.Linear(in_features=hidden_dim, out_features=vocab_size)
# TODO init layers?
def forward(self, x, state=None):
"""state is a tuple for hidden and cell state for initialisation of the lstm"""
x = self.embedding(x)
# if no state is provided, default the state to zeros
if state is None:
x, new_state = self.lstm(x)
else:
x, new_state = self.lstm(x, state)
x = self.dense(x)
return x, new_state
[docs]class net_quadratic_deep(nn.Sequential):
r"""This architecture creates an output which corresponds to a loss functions of the form
:math:`(\theta - x)^T * Q * (\theta - x)`
with Hessian ``Q`` and "data" ``x`` coming from the quadratic data set, i.e.,
zero-mean normal.
The parameters are initialized to 1.
"""
def __init__(self, hessian):
"""Args:
hessian (np.array): The matrix for the quadratic form."""
super().__init__()
# for init
dim = hessian.size(0)
sqrt_hessian = self._compute_sqrt(hessian)
self.add_module("shift", nn.Linear(dim, dim, bias=True))
self.add_module("scale", nn.Linear(dim, dim, bias=False))
# init
self.shift.weight.data = - torch.eye(dim, dim)
self.shift.weight.requires_grad = False
nn.init.ones_(self.shift.bias)
self.scale.weight.data = sqrt_hessian.t()
self.scale.weight.requires_grad = False
@staticmethod
def _compute_sqrt(mat):
return torch.cholesky(mat)
class net_mlp(nn.Sequential):
""" A basic MLP architecture. The network is build as follows:
- Four fully-connected layers with ``1000``, ``500``,``100`` and ``num_outputs``
units per layer, where ``num_outputs`` is the number of ouputs (i.e. class labels).
- The first three layers use ReLU activation, and the last one a softmax
activation.
- The biases are initialized to ``0.0`` and the weight matrices with
truncated normal (standard deviation of ``3e-2``)"""
def __init__(self, num_outputs):
super(net_mlp, self).__init__()
self.add_module("flatten", nn.Flatten())
self.add_module("dense1", nn.Linear(784, 1000))
self.add_module("relu1", nn.ReLU())
self.add_module("dense2", nn.Linear(1000, 500))
self.add_module("relu2", nn.ReLU())
self.add_module("dense3", nn.Linear(500, 100))
self.add_module("relu3", nn.ReLU())
self.add_module("dense4", nn.Linear(100, num_outputs))
for module in self.modules():
if isinstance(module, nn.Linear):
nn.init.constant_(module.bias, 0.0)
module.weight.data = _truncated_normal_init(
module.weight.data, mean=0, stddev=3e-2
)