Source code for deepobs.tuner.tuner

# -*- coding: utf-8 -*-
import abc
import os

from numpy.random import seed as np_seed

from deepobs.analyzer.shared_utils import _dump_json

from .tuner_utils import rerun_setting


[docs]class Tuner(abc.ABC): """ The base class for all tuning methods in DeepOBS. """
[docs] def __init__(self, optimizer_class, hyperparam_names, ressources, runner): """ Args: optimizer_class (framework optimizer class): The optimizer class of the optimizer that is run on \ the testproblems. For PyTorch this must be a subclass of torch.optim.Optimizer. For \ TensorFlow a subclass of tf.train.Optimizer. hyperparam_names (dict): A nested dictionary that lists all hyperparameters of the optimizer, \ their type and their default values (if they have any) in the form: {'<name>': {'type': <type>, 'default': <default value>}}, \ e.g. for torch.optim.SGD with momentum: \ {'lr': {'type': float}, \ 'momentum': {'type': float, 'default': 0.99}, \ 'uses_nesterov': {'type': bool, 'default': False}} ressources (int): The number of evaluations the tuner is allowed to perform on each testproblem. runner: The DeepOBS runner that the tuner uses for evaluation. """ self._optimizer_class = optimizer_class self._optimizer_name = optimizer_class.__name__ self._hyperparam_names = hyperparam_names self._ressources = ressources self._runner = runner
@staticmethod def _set_seed(random_seed): np_seed(random_seed)
[docs] def tune_on_testset(self, testset, *args, **kwargs): """Tunes the hyperparameter on a whole testset. Args: testset (list): A list of testproblems. """ if any(s in kwargs for s in ["num_epochs", "batch_size", "l2_reg"]): raise RuntimeError( "Cannot execute tuning on a whole testset if num_epochs, " "l2_reg or batch_size is set. " "A testset tuning is ment to tune on default testproblems." ) for testproblem in testset: self.tune(testproblem, *args, **kwargs)
[docs] @abc.abstractmethod def tune( self, testproblem, *args, output_dir="./results", random_seed=42, rerun_best_setting=True, **kwargs ): """Tunes hyperparaneter of the optimizer_class on a testproblem. Args: testproblem (str): Testproblem for which to generate commands. output_dir (str): The output path where the execution results are written to. random_seed (int): The random seed for the tuning. rerun_best_setting (bool): Whether to rerun the best setting with 10 different seeds. """ pass
[docs]class ParallelizedTuner(Tuner): """ The base class for all tuning methods which are uninformed and parallelizable, like Grid Search and Random Search. """ def __init__(self, optimizer_class, hyperparam_names, ressources, runner): super(ParallelizedTuner, self).__init__( optimizer_class, hyperparam_names, ressources, runner ) @abc.abstractmethod def _sample(self): return def _generate_hyperparams_format_for_command_line(self, hyperparams): """Overwrite this method to specify how hyperparams should be represented in the command line string. This is basically the inversion of your runner specific method ``_add_hyperparams_to_argparse``""" string = "" for key, value in hyperparams.items(): if self._hyperparam_names[key]["type"] == bool: string += " --" + key else: string += " --" + key + " " + str(value) return string def _generate_kwargs_format_for_command_line(self, **kwargs): """Overwrite this method to specify how additional training params should be represented in the command line string. This is basically the inversion of your runner specific method ``_add_training_params_to_argparse``""" string = "" for key, value in kwargs.items(): if key == "lr_sched_factors" or key == "lr_sched_epochs": string += " --" + key for v in value: string += " " + str(v) else: string += " --" + key + " " + str(value) return string
[docs] def tune( self, testproblem, output_dir="./results", random_seed=42, rerun_best_setting=False, **kwargs ): """Tunes the optimizer on the test problem. Args: testproblem (str): The test problem to tune the optimizer on. output_dir (str): The output directory for the results. random_seed (int): Random seed for the whole truning process. Every individual run is seeded by it. rerun_best_setting (bool): Whether to automatically rerun the best setting with 10 different seeds. """ self._set_seed(random_seed) params = self._sample() for sample in params: runner = self._runner(self._optimizer_class, self._hyperparam_names) runner.run( testproblem, hyperparams=sample, random_seed=random_seed, output_dir=output_dir, **kwargs, ) if rerun_best_setting: optimizer_path = os.path.join(output_dir, testproblem, self._optimizer_name) rerun_setting( self._runner, self._optimizer_class, self._hyperparam_names, optimizer_path, )
[docs] def generate_commands_script( self, testproblem, run_script, output_dir="./results", random_seed=42, generation_dir="./command_scripts", **kwargs ): """ Args: testproblem (str): Testproblem for which to generate commands. run_script (str): Name the run script that is used from the command line. output_dir (str): The output path where the execution results are written to. random_seed (int): The random seed for the tuning. generation_dir (str): The path to the directory where the generated scripts are written to. Returns: str: The relative file path to the generated commands script. """ os.makedirs(generation_dir, exist_ok=True) file_path = os.path.join( generation_dir, "jobs_" + self._optimizer_name + "_" + self._search_name + "_" + testproblem + ".txt", ) file = open(file_path, "w") kwargs_string = self._generate_kwargs_format_for_command_line(**kwargs) self._set_seed(random_seed) params = self._sample() for sample in params: sample_string = self._generate_hyperparams_format_for_command_line(sample) file.write( "python3 " + run_script + " " + testproblem + " " + sample_string + " --random_seed " + str(random_seed) + " --output_dir " + output_dir + " " + kwargs_string + "\n" ) file.close() return file_path
[docs] def generate_commands_script_for_testset(self, testset, *args, **kwargs): """Generates command scripts for a whole testset. Args: testset (list): A list of the testproblem strings. """ if any(s in kwargs for s in ["num_epochs", "batch_size", "l2_reg"]): raise RuntimeError( "Cannot execute tuning on a whole testset if num_epochs, " "l2_reg or batch_size is set. " "A testset tuning is ment to tune on default testproblems." ) for testproblem in testset: self.generate_commands_script(testproblem, *args, **kwargs)