Source code for deepobs.tensorflow.datasets.imagenet

# -*- coding: utf-8 -*-
"""ImageNet DeepOBS dataset."""

import os

import tensorflow as tf

from deepobs import config

from . import dataset

[docs]class imagenet(dataset.DataSet): """DeepOBS data set class for the `ImageNet\ <>`_ data set. .. NOTE:: We use ``1001`` classes which includes an additional `background` class, as it is used for example by the inception net. Args: batch_size (int): The mini-batch size to use. Note that, if ``batch_size`` is not a divider of the dataset size the remainder is dropped in each epoch (after shuffling). data_augmentation (bool): If ``True`` some data augmentation operations (random crop window, horizontal flipping, lighting augmentation) are applied to the training data (but not the test data). train_eval_size (int): Size of the train eval dataset. Defaults to ``10 000``. Attributes: batch: A tuple ``(x, y)`` of tensors, yielding batches of ImageNet images (``x`` with shape ``(batch_size, 224, 224, 3)``) and corresponding one-hot label vectors (``y`` with shape ``(batch_size, 1001)``). Executing these tensors raises a ``tf.errors.OutOfRangeError`` after one epoch. train_init_op: A tensorflow operation initializing the dataset for the training phase. train_eval_init_op: A tensorflow operation initializing the testproblem for evaluating on training data. valid_init_op: A tensorflow operation initializing the testproblem for evaluating on validation data. test_init_op: A tensorflow operation initializing the testproblem for evaluating on test data. phase: A string-value tf.Variable that is set to ``train``, ``train_eval``, ``valid``, or ``test``, depending on the current phase. This can be used by testproblems to adapt their behavior to this phase. """ def __init__(self, batch_size, data_augmentation=True, train_eval_size=50000): """Creates a new ImageNet instance. Args: batch_size (int): The mini-batch size to use. Note that, if ``batch_size`` is not a divider of the dataset size the remainder is dropped in each epoch (after shuffling). data_augmentation (bool): If ``True`` some data augmentation operations (random crop window, horizontal flipping, lighting augmentation) are applied to the training data (but not the test data). train_eval_size (int): Size of the train eval dataset (default: 10k). """ self._name = "imagenet" self._data_augmentation = data_augmentation self._train_eval_size = train_eval_size super(imagenet, self).__init__(batch_size) def _make_dataset( self, data, per_image_standardization=True, random_crop=False, random_flip_left_right=False, distort_color=False, shuffle=True, ): """Creates an ImageNet data set (helper used by ``.make_*_datset`` below). Args: data ( A with ImageNet (train or test) data per_image_standardization (bool): Switch to standardize each image to have zero mean and unit norm. Defaults to ``True``. random_crop (bool): Switch if random crops should be used. Defaults to ``False``. random_flip_left_right (bool): Switch to randomly flip the images horizontally. Defaults to ``False``. distort_color (bool): Switch to use random brightness, saturation, hue and contrast on each image. Defaults to ``False``. shuffle (bool): Switch to turn on or off shuffling of the data set. Defaults to ``True``. Returns: A yielding batches of ImageNet data. """ num_classes = 1001 # Class 0 is for Background. Therefore we have 1001 def parse_func(example_serialized): """Parse function depending on the above arguments and map the data set through it """ # Parse example proto, decode image and resize while preserving aspect image_buffer, label, _ = self._parse_example_proto(example_serialized) image = self._decode_jpeg(image_buffer) image = self._aspect_preserving_resize(image, target_smaller_side=256) # Crop to 224x224, either randomly or centered according to arguments if random_crop: image = tf.image.resize_image_with_crop_or_pad(image, 256, 256) image = tf.random_crop(image, [224, 224, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, 224, 224) # Optionally perform random flip if random_flip_left_right: image = tf.image.random_flip_left_right(image) # Optionally distort color if distort_color: image = self._color_distortion(image) # Normalize if per_image_standardization: image = tf.image.per_image_standardization(image) # Convert label to shape [] (instead of) [1,] such that the label vector for # a mini-batch will later be of shape [batch_size,] label = tf.reshape(label, []) # Label to one-hot vector label = tf.squeeze(tf.one_hot(label, depth=num_classes)) return image, label with tf.name_scope(self._name): with tf.device("/cpu:0"): data = parse_func, num_parallel_calls=(8 if self._data_augmentation else 4), ) if shuffle: data = data.shuffle(buffer_size=10000) data = data.batch(self._batch_size, drop_remainder=True) data = data.prefetch(buffer_size=4) return data def _load_dataset(self, binaries_fname_pattern): """Creates an ImageNet data set (helper used by ``.make_*_datset`` below). Args: pattern (str): Pattern of the files from which to load images and labels (e.g. ``some/path/train-00000-of-01024``). Returns: A yielding ImageNet data. """ with tf.name_scope(self._name): with tf.device("/cpu:0"): filenames = tf.matching_files(binaries_fname_pattern) filenames = tf.random_shuffle(filenames) data = return data def _make_train_datasets(self): """Creates the three ImageNet datasets stemming from the training part of the data set, i.e. the training set, the training evaluation set, and the validation set. Returns: A instance with batches of training data. A instance with batches of training eval data. A instance with batches of validation data. """ pattern = os.path.join(config.get_data_dir(), "imagenet", "train-*") data = self._load_dataset(pattern) valid_data = data.take(self._train_eval_size) train_data = data.skip(self._train_eval_size) train_data = self._make_dataset( train_data, per_image_standardization=True, random_crop=self._data_augmentation, random_flip_left_right=self._data_augmentation, distort_color=False, shuffle=True, ) train_eval_data = train_data.take(self._train_eval_size // self._batch_size) valid_data = self._make_dataset( valid_data, per_image_standardization=True, random_crop=False, random_flip_left_right=False, distort_color=False, shuffle=False, ) return train_data, train_eval_data, valid_data def _make_test_dataset(self): """Creates the ImageNet test dataset. Returns: A instance with batches of test data. """ pattern = os.path.join(config.get_data_dir(), "imagenet", "validation-*") test_data = self._load_dataset(pattern) return self._make_dataset( test_data, per_image_standardization=True, random_crop=False, random_flip_left_right=False, distort_color=False, shuffle=False, ) def _parse_example_proto(self, example_serialized): """Parses an Example proto containing a training example of an image. The output of the image preprocessing script is a dataset containing serialized Example protocol buffers. Each Example proto contains the following fields: image/height: 462 image/width: 581 image/colorspace: 'RGB' image/channels: 3 image/class/label: 615 image/class/synset: 'n03623198' image/class/text: 'knee pad' image/format: 'JPEG' image/filename: 'ILSVRC2012_val_00041207.JPEG' image/encoded: <JPEG encoded string> Args: example_serialized (tf.string): Scalar Tensor tf.string containing a serialized Example protocol buffer. Returns: tupel: Tupel of image_buffer (tf.string) containing the contents of a JPEG file, the label (tf.int32) containing the label and text (tf.string) containing the human-readable label. """ # Dense features in Example proto. feature_map = { "image/encoded": tf.FixedLenFeature([], dtype=tf.string, default_value=""), "image/class/label": tf.FixedLenFeature( [1], dtype=tf.int64, default_value=-1 ), "image/class/text": tf.FixedLenFeature( [], dtype=tf.string, default_value="" ), } features = tf.parse_single_example(example_serialized, feature_map) label = tf.cast(features["image/class/label"], dtype=tf.int32) return features["image/encoded"], label, features["image/class/text"] def _decode_jpeg(self, image_buffer, scope=None): """Decode a JPEG string into one 3-D float image Tensor. Args: image_buffer (tf.string): scalar string Tensor. scope (str): Optional scope for name_scope. Returns: tf.Tensor: 3-D float Tensor with values ranging from [0, 1). """ with tf.name_scope( values=[image_buffer], name=scope, default_name="decode_jpeg" ): # Decode the string as an RGB JPEG. # Note that the resulting image contains an unknown height and width # that is set dynamically by _decode_jpeg. In other words, the height # and width of image is unknown at compile-time. image = tf.image.decode_jpeg(image_buffer, channels=3) # After this point, all image pixels reside in [0,1) # until the very end, when they're rescaled to (-1, 1). The various # adjust_* ops all require this range for dtype float. image = tf.image.convert_image_dtype(image, dtype=tf.float32) return image def _aspect_preserving_resize(self, image, target_smaller_side): """"Resize image such that the smaller size has size ``target_smaller_sider`` while preserving the aspect ratio. Args: image (tf.Tensor): Tensor containing the image to resize. target_smaller_side (int): Target size for the smaller side in pixel. Returns: tf.Tensor: The resized image, with the same aspect ratio as the input. """ shape = tf.shape(image) height = tf.to_float(shape[0]) width = tf.to_float(shape[1]) smaller_side = tf.reduce_min(shape[0:2]) scale = tf.divide(target_smaller_side, tf.to_float(smaller_side)) new_height = tf.to_int32(tf.round(scale * height)) new_width = tf.to_int32(tf.round(scale * width)) resized_image = tf.image.resize_images(image, [new_height, new_width]) return resized_image def _color_distortion(self, image, scope=None): """Distort the color of the image. Args: image (tf.Tensor): Tensor containing single image. scope (str): Optional scope for name_scope. Returns: tf.Tensor: The color-distorted image. """ with tf.name_scope(values=[image], name=scope, default_name="distort_color"): image = tf.image.random_brightness(image, max_delta=32.0 / 255.0) image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image = tf.image.random_hue(image, max_delta=0.2) image = tf.image.random_contrast(image, lower=0.5, upper=1.5) # The random_* ops do not necessarily clamp. image = tf.clip_by_value(image, 0.0, 1.0) return image