Source code for deepobs.tensorflow.datasets.two_d

# -*- coding: utf-8 -*-
"""2D DeepOBS dataset."""

import numpy as np
import tensorflow as tf
from . import dataset


[docs]class two_d(dataset.DataSet): """DeepOBS data set class to create two dimensional stochastic testproblems. This toy data set consists of a fixed number (``train_size``) of iid draws from two scalar zero-mean normal distributions with standard deviation specified by the ``noise_level``. Args: batch_size (int): The mini-batch size to use. Note that, if ``batch_size`` is not a divider of the dataset size (``1000`` for train and test) the remainder is dropped in each epoch (after shuffling). train_size (int): Size of the training data set. This will also be used as the train_eval and test set size. Defaults to ``10000``. noise_level (float): Standard deviation of the data points around the mean. The data points are drawn from a Gaussian distribution. Defaults to ``1.0``. Attributes: batch: A tuple ``(x, y)`` of tensors with random x and y that can be used to create a noisy two dimensional testproblem. Executing these tensors raises a ``tf.errors.OutOfRangeError`` after one epoch. train_init_op: A tensorflow operation initializing the dataset for the training phase. train_eval_init_op: A tensorflow operation initializing the testproblem for evaluating on training data. test_init_op: A tensorflow operation initializing the testproblem for evaluating on test data. phase: A string-value tf.Variable that is set to "train", "train_eval" or "test", depending on the current phase. This can be used by testproblems to adapt their behavior to this phase. """ def __init__(self, batch_size, train_size=10000, noise_level=1.0): """Creates a new 2D instance. Args: batch_size (int): The mini-batch size to use. Note that, if ``batch_size`` is not a divider of the dataset size (1k for train and test) the remainder is dropped in each epoch (after shuffling). train_size (int): Size of the training data set. This will also be used as the train_eval and test set size. Defaults to ``10000``. noise_level (float): Standard deviation of the data points around the mean. The data points are drawn from a Gaussian distribution. Defaults to ``1.0``. """ self._name = "two_d" self._train_size = train_size self._noise_level = noise_level super(two_d, self).__init__(batch_size) def _make_dataset(self, data_x, data_y, shuffle=True): """Creates a 2D data set (helper used by ``.make_*_datset`` below). Args: data_x (np.array): Numpy array containing the ``X`` values of the data points. data_y (np.array): Numpy array containing the ``y`` values of the data points. shuffle (bool): Switch to turn on or off shuffling of the data set. Defaults to ``True``. Returns: A tf.data.Dataset yielding batches of 2D data. """ with tf.name_scope(self._name): with tf.device("/cpu:0"): data = tf.data.Dataset.from_tensor_slices((data_x, data_y)) if shuffle: data = data.shuffle(buffer_size=20000) data = data.batch(self._batch_size, drop_remainder=True) data = data.prefetch(buffer_size=4) return data def _make_train_datasets(self): """Creates the three 2D datasets stemming from the training part of the data set, i.e. the training set, the training evaluation set, and the validation set. Returns: A tf.data.Dataset instance with batches of training data. A tf.data.Dataset instance with batches of training eval data. A tf.data.Dataset instance with batches of validation data. """ # Draw data from a random generator with a fixed seed to always get the # same data. rng = np.random.RandomState(42) train_x = rng.normal(0.0, self._noise_level, self._train_size) train_y = rng.normal(0.0, self._noise_level, self._train_size) train_x = np.float32(train_x) train_y = np.float32(train_y) train_data = self._make_dataset(train_x, train_y, shuffle=True) train_eval_data = train_data.take(self._train_size // self._batch_size) # Draw data from a random generator with a fixed seed to always get the # same data. rng = np.random.RandomState(44) valid_x = rng.normal(0.0, self._noise_level, self._train_size) valid_y = rng.normal(0.0, self._noise_level, self._train_size) valid_x = np.float32(valid_x) valid_y = np.float32(valid_y) valid_data = self._make_dataset(valid_x, valid_y, shuffle=False) return train_data, train_eval_data, valid_data def _make_test_dataset(self): """Creates the quadratic test dataset. Returns: A tf.data.Dataset instance with batches of test data. """ # recovers the deterministic 2D function using zeros test_x, test_y = np.zeros(self._train_size), np.zeros(self._train_size) test_x = np.float32(test_x) test_y = np.float32(test_y) return self._make_dataset(test_x, test_y, shuffle=False)