Source code for deepobs.pytorch.datasets.quadratic

# -*- coding: utf-8 -*-
"""Quadratic DeepOBS dataset."""

import numpy as np
import torch
from torch.utils import data as dat

from . import dataset


[docs]class quadratic(dataset.DataSet): """DeepOBS data set class to create an n dimensional stochastic quadratic\ testproblem. This toy data set consists of a fixed number (``train_size``) of iid draws from a zero-mean normal distribution in ``dim`` dimensions with isotropic covariance specified by ``noise_level``. Args: batch_size (int): The mini-batch size to use. Note that, if ``batch_size`` is not a divider of the dataset size (``1000`` for train and test) the remainder is dropped in each epoch (after shuffling). dim (int): Dimensionality of the quadratic. Defaults to ``100``. train_size (int): Size of the dataset; will be used for train, train eval and test datasets. Defaults to ``1000``. noise_level (float): Standard deviation of the data points around the mean. The data points are drawn from a Gaussian distribution. Defaults to ``0.6``. """ def __init__(self, batch_size, dim=100, train_size=1000, noise_level=0.6): """Creates a new Quadratic instance. Args: batch_size (int): The mini-batch size to use. Note that, if ``batch_size`` is not a divider of the dataset size (``1000`` for train and test) the remainder is dropped in each epoch (after shuffling). dim (int): Dimensionality of the quadratic. Defaults to ``100``. train_size (int): Size of the dataset; will be used for train, train eval and test datasets. Defaults to ``1000``. noise_level (float): Standard deviation of the data points around the mean. The data points are drawn from a Gaussian distribution. Defaults to ``0.6``. """ self._name = "quadratic" self._dim = dim self._train_size = train_size self._noise_level = noise_level super(quadratic, self).__init__(batch_size) def _make_labels(self): """Return zeros as labels.""" np_labels = np.zeros((self._train_size, self._dim), dtype=np.float32) return torch.from_numpy(np_labels) def _make_data(self, seed): """Draw data from a random generator with a fixed seed to always get the same data. Add noise""" rng = np.random.RandomState(seed) X = rng.normal(0.0, self._noise_level, (self._train_size, self._dim)) X = np.float32(X) return torch.from_numpy(X) def _make_train_and_valid_dataloader(self): # Draw data from a random generator with a fixed seed to always get the # same data. X = self._make_data(seed=42) Y = self._make_labels() train_dataset = dat.TensorDataset(X, Y) X = self._make_data(seed=44) Y = self._make_labels() valid_dataset = dat.TensorDataset(X, Y) train_loader = self._make_dataloader(train_dataset, shuffle=True) valid_loader = self._make_dataloader(valid_dataset) return train_loader, valid_loader def _make_test_dataloader(self): # Draw data from a random generator with a fixed seed to always get the # same data. X = self._make_data(seed=43) Y = self._make_labels() test_dataset = dat.TensorDataset(X, Y) return self._make_dataloader(test_dataset) def _make_train_eval_dataloader(self): return self._train_dataloader