Source code for deepobs.tensorflow.testproblems.tolstoi_char_rnn

# -*- coding: utf-8 -*-
"""A two-layer LSTM for character-level language modelling on Tolstoi's War and Peace."""

import tensorflow as tf

from ..datasets.tolstoi import tolstoi
from .testproblem import TestProblem


[docs]class tolstoi_char_rnn(TestProblem): """DeepOBS test problem class for a two-layer LSTM for character-level language modelling (Char RNN) on Tolstoi's War and Peace. Some network characteristics: - ``128`` hidden units per LSTM cell - sequence length ``50`` - cell state is automatically stored in variables between subsequent steps - when the phase placeholder swithches its value from one step to the next, the cell state is set to its zero value (meaning that we set to zero state after each round of evaluation, it is therefore important to set the evaluation interval such that we evaluate after a full epoch.) Working training parameters are: - batch size ``50`` - ``200`` epochs - SGD with a learning rate of :math:`\\approx 0.1` works Args: batch_size (int): Batch size to use. weight_decay (float): No weight decay (L2-regularization) is used in this test problem. Defaults to ``None`` and any input here is ignored. Attributes: dataset: The DeepOBS data set class for Tolstoi. train_init_op: A tensorflow operation initializing the test problem for the training phase. train_eval_init_op: A tensorflow operation initializing the test problem for evaluating on training data. test_init_op: A tensorflow operation initializing the test problem for evaluating on test data. losses: A tf.Tensor of shape (batch_size, ) containing the per-example loss values. regularizer: A scalar tf.Tensor containing a regularization term. accuracy: A scalar tf.Tensor containing the mini-batch mean accuracy. """ def __init__(self, batch_size, weight_decay=None): """Create a new Char RNN test problem instance on Tolstoi. Args: batch_size (int): Batch size to use. weight_decay (float): No weight decay (L2-regularization) is used in this test problem. Defaults to ``None`` and any input here is ignored. """ super(tolstoi_char_rnn, self).__init__(batch_size, weight_decay) if weight_decay is not None: print( "WARNING: Weight decay is non-zero but no weight decay is used", "for this model." )
[docs] def set_up(self): """Set up the Char RNN test problem instance on Tolstoi.""" self.dataset = tolstoi(self._batch_size) seq_length = 50 vocab_size = 83 # For War and Peace x, y = self.dataset.batch num_layers = 2 rnn_size = 128 input_keep_prob = tf.cond( tf.equal(self.dataset.phase, tf.constant("train")), lambda: tf.constant(0.8), lambda: tf.constant(1.0)) output_keep_prob = tf.cond( tf.equal(self.dataset.phase, tf.constant("train")), lambda: tf.constant(0.8), lambda: tf.constant(1.0)) # Create an embedding matrix, look up embedding of input embedding = tf.get_variable("embedding", [vocab_size, rnn_size]) inputs = tf.nn.embedding_lookup(embedding, x) # Split batch of input sequences along time, such that inputs[i] is a # batch_size x embedding_size representation of the batch of characters # at position i of this batch of sequences inputs = tf.split(inputs, seq_length, axis=1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] # Make Multi LSTM cell cells = [] for _ in range(num_layers): cell = tf.contrib.rnn.LSTMCell(rnn_size) cell = tf.contrib.rnn.DropoutWrapper( cell, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob) cells.append(cell) cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True) # Create RNN using the cell defined above, (including operations that store) # the state in variables self.state_variables, self.zero_states = self._get_state_variables( self._batch_size, cell) outputs, new_states = tf.nn.static_rnn( cell, inputs, initial_state=self.state_variables) with tf.control_dependencies(outputs): state_update_op = self._get_state_update_op(self.state_variables, new_states) # Reshape RNN output for multiplication with softmax layer # print "Shape of outputs", [output.get_shape() for output in outputs] with tf.control_dependencies(state_update_op): output = tf.reshape(tf.concat(outputs, 1), [-1, rnn_size]) # print "Shape of output", output.get_shape() # Apply softmax layer with tf.variable_scope("rnnlm"): softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) logits = tf.matmul(output, softmax_w) + softmax_b # print logits.get_shape() # Reshape logits to batch_size x seq_length x vocab size reshaped_logits = tf.reshape( logits, [self._batch_size, seq_length, vocab_size]) # print "Shape of reshaped logits", reshaped_logits.get_shape() # Create vector of losses self.losses = tf.contrib.seq2seq.sequence_loss( reshaped_logits, y, weights=tf.ones([self._batch_size, seq_length], dtype=tf.float32), average_across_timesteps=True, average_across_batch=False) predictions = tf.argmax(reshaped_logits, 2) correct_prediction = tf.equal(predictions, y) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) self.regularizer = tf.losses.get_regularization_loss() self.train_init_op = tf.group([ self.dataset.train_init_op, self._get_state_update_op(self.state_variables, self.zero_states) ]) self.train_eval_init_op = tf.group([ self.dataset.train_eval_init_op, self._get_state_update_op(self.state_variables, self.zero_states) ]) self.test_init_op = tf.group([ self.dataset.test_init_op, self._get_state_update_op(self.state_variables, self.zero_states) ])
def _get_state_variables(self, batch_size, cell): """For each layer, get the initial state and make a variable out of it to enable updating its value. Args: batch_size (int): Batch size. cell (tf.BasicLSTMCell): LSTM cell to get the initial state for. Returns: tupel: Tupel of the state variables and there zero states. """ # For each layer, get the initial state and make a variable out of it # to enable updating its value. zero_state = cell.zero_state(batch_size, tf.float32) state_variables = [] for state_c, state_h in zero_state: state_variables.append( tf.contrib.rnn.LSTMStateTuple( tf.Variable(state_c, trainable=False), tf.Variable(state_h, trainable=False))) # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state return tuple(state_variables), zero_state def _get_state_update_op(self, state_variables, new_states): """Add an operation to update the train states with the last state tensors Args: state_variables (tf.Variable): State variables to be updated new_states (tf.Variable): New state of the state variable. Returns: tf.Operation: Return a tuple in order to combine all update_ops into a single operation. The tuple's actual value should not be used. """ # Add an operation to update the train states with the last state tensors update_ops = [] for state_variable, new_state in zip(state_variables, new_states): # Assign the new state to the state variables on this layer update_ops.extend([ state_variable[0].assign(new_state[0]), state_variable[1].assign(new_state[1]) ]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. return tf.tuple(update_ops)