Source code for deepobs.analyzer.analyze

from __future__ import print_function

import os
import time
from collections import Counter

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

from ..tuner.tuner_utils import generate_tuning_summary
from .analyze_utils import _preprocess_path, _rescale_ax
from .shared_utils import (_check_output_structure,
                           _check_setting_folder_is_not_empty,
                           _determine_available_metric,
                           _get_optimizer_name_and_testproblem_from_path,
                           create_setting_analyzer_ranking)

sns.set()
sns.set_style(
    "whitegrid",
    {
        "axes.grid": False,
        "axes.spines.top": False,
        "axes.spines.right": False,
        "xtick.bottom": True,
        "ytick.left": True,
        "xtick.direction": "in",
        "ytick.direction": "in",
        "xtick.color": ".5",
        "ytick.color": ".5",
    },
)
colors = ["#feae34","#193c3e","#733e39","#3e2731","#a22633","#e43b44","#f77622","#63c74d","#265c42","#124e89","#0099db","#5a6988","#68386c","#b55088","#f6757a","#181425","#75580f"]
sns.set_palette(colors)


[docs]def check_output(results_path):
    """Iterates through the results folder an checks all outputs for format and completeness. It checks for some basic
    format in every json file and looks for setting folders which are empty.
    It further gives an overview over the amount of different settings and seed runs for
    each test problem and each optimizer. It does not return anything, but it prints an overview to the console.

    Args:
        results_path (str): Path to the results folder.
    """
    testproblems = sorted(os.listdir(results_path))
    for testproblem in testproblems:
        testproblem_path = os.path.join(results_path, testproblem)
        optimizers = sorted(os.listdir(testproblem_path))
        for optimizer in optimizers:
            optimizer_path = os.path.join(testproblem_path, optimizer)
            settings = [
                setting
                for setting in os.listdir(optimizer_path)
                if os.path.isdir(os.path.join(optimizer_path, setting))
                and "num_epochs" in setting
            ]
            n_runs_list = []
            for setting in settings:
                setting_path = os.path.join(optimizer_path, setting)
                _check_setting_folder_is_not_empty(setting_path)
                jsons_files = [
                    file for file in os.listdir(setting_path) if "json" in file
                ]
                n_runs_list.append(len(jsons_files))
                for json_file in jsons_files:
                    _check_output_structure(setting_path, json_file)
            counter = Counter(n_runs_list)
            for n_runs, count in counter.items():
                print(
                    "{0:s} | {1:s}: {2:d} setting(s) with {3:d} seed(s).".format(
                        testproblem, optimizer, count, n_runs
                    )
                )


[docs]def estimate_runtime(
    framework,
    runner_cls,
    optimizer_cls,
    optimizer_hp,
    optimizer_hyperparams,
    n_runs=5,
    sgd_lr=0.01,
    testproblem="mnist_mlp",
    num_epochs=5,
    batch_size=128,
    **kwargs
):
    """Can be used to estimates the runtime overhead of a new optimizer compared to SGD. Runs the new optimizer and
    SGD seperately and calculates the fraction of wall clock overhead.

    Args:
        framework (str): Framework that you use. Must be 'pytorch' or 'tensorlfow'.
        runner_cls: The runner class that your optimizer uses.
        optimizer_cls: Your optimizer class.
        optimizer_hp (dict): Its hyperparameter specification as it is used in the runner initialization.
        optimizer_hyperparams (dict): Optimizer hyperparameter values to run.
        n_runs (int): The number of run calls for which the overhead is averaged over.
        sgd_lr (float): The vanilla SGD learning rate to use.
        testproblem (str): The deepobs testproblem to run SGD and the new optimizer on.
        num_epochs (int): The number of epochs to run for the testproblem.
        batch_size (int): Batch size of the testproblem.

    Returns:
        str: The output that is printed to the console.
    """

    # get the standard runner with SGD
    if framework == "pytorch":
        from deepobs import pytorch as ptobs
        from torch.optim import SGD

        runner_sgd = ptobs.runners.StandardRunner
        optimizer_class_sgd = SGD
        hp_sgd = {"lr": {"type": float}}
        hyperparams_sgd = {"lr": sgd_lr}

    elif framework == "tensorflow":
        from deepobs import tensorflow as tfobs
        import tensorflow as tf

        optimizer_class_sgd = tf.train.GradientDescentOptimizer
        hp_sgd = {"learning_rate": {"type": float}}
        runner_sgd = tfobs.runners.StandardRunner
        hyperparams_sgd = {"learning_rate": sgd_lr}
    else:
        raise RuntimeError("Framework must be pytorch or tensorflow")

    sgd_times = []
    new_opt_times = []

    for i in range(n_runs):
        print("** Start Run: ", i + 1, "of", n_runs)

        # SGD
        print("Running SGD")
        start_sgd = time.time()
        runner = runner_sgd(optimizer_class_sgd, hp_sgd)
        runner.run(
            testproblem=testproblem,
            hyperparams=hyperparams_sgd,
            batch_size=batch_size,
            num_epochs=num_epochs,
            no_logs=True,
            **kwargs,
        )
        end_sgd = time.time()

        sgd_times.append(end_sgd - start_sgd)
        print("Time for SGD run ", i + 1, ": ", sgd_times[-1])

        # New Optimizer
        runner = runner_cls(optimizer_cls, optimizer_hp)
        print("Running...", optimizer_cls.__name__)
        start_script = time.time()
        runner.run(
            testproblem=testproblem,
            hyperparams=optimizer_hyperparams,
            batch_size=batch_size,
            num_epochs=num_epochs,
            no_logs=True,
            **kwargs,
        )
        end_script = time.time()

        new_opt_times.append(end_script - start_script)
        print("Time for new optimizer run ", i + 1, ": ", new_opt_times[-1])

    overhead = np.divide(new_opt_times, sgd_times)

    output = (
        "** Mean run time SGD: "
        + str(np.mean(sgd_times))
        + "\n"
        + "** Mean run time new optimizer: "
        + str(np.mean(new_opt_times))
        + "\n"
        + "** Overhead per run: "
        + str(overhead)
        + "\n"
        + "** Mean overhead: "
        + str(np.mean(overhead))
        + " Standard deviation: "
        + str(np.std(overhead))
    )

    print(output)
    return output


[docs]def plot_results_table(
    results_path, mode="most", metric="valid_accuracies", conv_perf_file=None
):
    """Summarizes the performance of the optimizer and prints it to a pandas data frame.

            Args:
                results_path (str): The path to the results directory.
                mode (str): The mode by which to decide the best setting.
                metric (str): The metric by which to decide the best setting.
                conv_perf_file (str): Path to the convergence performance file. It is used to calculate the speed of the optimizer. Defaults to ``None`` in which case the speed measure is N.A.

            Returns:
                pandas.DataFrame: A data frame that summarizes the results on the test set.
                """
    table_dic = {}
    testproblems = os.listdir(results_path)
    metric_keys = [
        "Hyperparameters",
        "Performance",
        "Speed",
        "Training Parameters",
    ]
    for testproblem in testproblems:
        # init new subdict for testproblem
        for metric_key in metric_keys:
            table_dic[(testproblem, metric_key)] = {}

        testproblem_path = os.path.join(results_path, testproblem)
        optimizers = sorted(os.listdir(testproblem_path))
        for optimizer in optimizers:
            optimizer_path = os.path.join(testproblem_path, optimizer)
            optimizer_performance_dic = get_performance_dictionary(
                optimizer_path, mode, metric, conv_perf_file
            )

            # invert inner dics for multiindexing
            for metric_key in metric_keys:
                table_dic[(testproblem, metric_key)][
                    optimizer
                ] = optimizer_performance_dic[metric_key]

    # correct multiindexing
    table = pd.DataFrame.from_dict(table_dic, orient="index")
    print(table)
    return table


[docs]def plot_testset_performances(
    results_path,
    mode="most",
    metric="valid_accuracies",
    reference_path=None,
    show=True,
    which="mean_and_std",
):
    """Plots all optimizer performances for all testproblems.

    Args:
        results_path (str): The path to the results folder.
        mode (str): The mode by which to decide the best setting.
        metric (str): The metric by which to decide the best setting.
        reference_path (str): Path to the reference results folder. For each available reference testproblem, all optimizers are plotted as reference.
        show (bool): Whether to show the plot or not.
        which (str): ['mean_and_std', 'median_and_quartiles'] Solid plot mean or median, shaded plots standard deviation or lower/upper quartiles.

    Returns:
        tuple: The figure and axes.
        """
    testproblems = sorted(
        [
            path
            for path in os.listdir(results_path)
            if os.path.isdir(os.path.join(results_path, path))
        ]
    )
    if reference_path is not None:
        reference_path = os.path.join(reference_path)
        reference_testproblems = sorted(
            [
                path
                for path in os.listdir(results_path)
                if os.path.isdir(os.path.join(reference_path, path))
            ]
        )
    else:
        reference_testproblems = []
    n_testproblems = len(testproblems)
    fig, ax = plt.subplots(4, n_testproblems, sharex="col")
    for idx, testproblem in enumerate(testproblems):
        testproblem_path = os.path.join(results_path, testproblem)
        fig, ax[:, idx] = _plot_optimizer_performance(
            testproblem_path,
            fig=fig,
            ax=ax[:, idx],
            mode=mode,
            metric=metric,
            which=which,
        )
        if testproblem in reference_testproblems:
            reference_testproblem_path = os.path.join(reference_path, testproblem)
            fig, ax[:, idx] = _plot_optimizer_performance(
                reference_testproblem_path,
                fig=fig,
                ax=ax[:, idx],
                mode=mode,
                metric=metric,
                which=which,
            )

    metrices = ["Test Loss", "Train Loss", "Test Accuracy", "Train Accuracy"]
    for idx, _metric in enumerate(metrices):
        # label y axes
        ax[idx, 0].set_ylabel(_metric)
        # rescale
        for idx2 in range(n_testproblems):
            ax[idx, idx2] = _rescale_ax(ax[idx, idx2])
            ax[idx, idx2].xaxis.set_ticks_position("none")
            ax[3, idx2].set_xlabel("Epochs")
            ax[3, idx2].xaxis.set_ticks_position("bottom")
    # show legend of optimizers
    ax[0, 0].legend()
    plt.tight_layout()
    plt.subplots_adjust(wspace=0.2, hspace=0.15)
    if show:
        plt.show()
    return fig, ax


def plot_hyperparameter_sensitivity_2d(
    optimizer_path,
    hyperparams,
    mode="final",
    metric="valid_accuracies",
    xscale="linear",
    yscale="linear",
    show=True,
):
    param1, param2 = hyperparams
    metric = _determine_available_metric(optimizer_path, metric)
    tuning_summary = generate_tuning_summary(optimizer_path, mode, metric)

    optimizer_name, testproblem = _get_optimizer_name_and_testproblem_from_path(
        optimizer_path
    )

    param_values1 = np.array([d["params"][param1] for d in tuning_summary])
    param_values2 = np.array([d["params"][param2] for d in tuning_summary])

    target_means = np.array([d[metric + "_mean"] for d in tuning_summary])
    target_stds = [d[metric + "_std"] for d in tuning_summary]

    fig, ax = plt.subplots()

    con = ax.tricontourf(
        param_values1,
        param_values2,
        target_means,
        cmap="CMRmap",
        levels=len(target_means),
    )
    ax.scatter(param_values1, param_values2)
    ax.set_xscale(xscale)
    ax.set_yscale(yscale)
    ax.set_xlabel(param1)
    ax.set_ylabel(param2)
    cbar = plt.colorbar(con)
    cbar.set_label(metric)
    if show:
        plt.show()
    return fig, ax


def _plot_hyperparameter_sensitivity(
    optimizer_path,
    hyperparam,
    ax,
    mode="final",
    metric="valid_accuracies",
    plot_std=False,
):

    metric = _determine_available_metric(optimizer_path, metric)
    tuning_summary = generate_tuning_summary(optimizer_path, mode, metric)

    optimizer_name, testproblem = _get_optimizer_name_and_testproblem_from_path(
        optimizer_path
    )

    # create array for plotting
    param_values = [d["params"][hyperparam] for d in tuning_summary]
    target_means = [d[metric + "_mean"] for d in tuning_summary]
    target_stds = [d[metric + "_mean"] for d in tuning_summary]

    param_values, target_means, target_stds = (
        list(t) for t in zip(*sorted(zip(param_values, target_means, target_stds)))
    )

    param_values = np.array(param_values)
    target_means = np.array(target_means)
    ax.plot(param_values, target_means, linewidth=3, label=optimizer_name)
    if plot_std:
        ranks = create_setting_analyzer_ranking(optimizer_path, mode, metric)
        for rank in ranks:
            values = rank.get_all_final_values(metric)
            param_value = rank.aggregate["optimizer_hyperparams"][hyperparam]
            for value in values:
                ax.scatter(param_value, value, marker="x", color="b")
            ax.plot(
                (param_value, param_value),
                (min(values), max(values)),
                color="grey",
                linestyle="--",
            )
    ax.set_title(testproblem, fontsize=20)
    return ax


[docs]def plot_hyperparameter_sensitivity(
    path,
    hyperparam,
    mode="final",
    metric="valid_accuracies",
    xscale="linear",
    plot_std=True,
    reference_path=None,
    show=True,
    ax=None,
):
    """Plots the hyperparameter sensitivtiy of the optimizer.

    Args:
        path (str): The path to the optimizer to analyse. Or to a whole testproblem. In that case, all optimizer sensitivities are plotted.
        hyperparam (str): The name of the hyperparameter that should be analyzed.
        mode (str): The mode by which to decide the best setting.
        metric (str): The metric by which to decide the best setting.
        xscale (str): The scale for the parameter axes. Is passed to plt.xscale().
        plot_std (bool): Whether to plot markers for individual seed runs or not. If `False`, only the mean is plotted.
        reference_path (str): Path to the reference optimizer or to a whole testproblem (in this case all optimizers in the testproblem folder are taken as reference).
        show (bool): Whether to show the plot or not.
        ax (matplotlib.axis): Axis to draw onto. Defaults to none, which creates a new one.

    Returns:
        tuple: The figure and axes of the plot.
        """
    if ax is None:
        fig, ax = plt.subplots()
    else:
        fig = plt.gcf()
    pathes = _preprocess_path(path)
    for optimizer_path in pathes:
        metric = _determine_available_metric(optimizer_path, metric)
        ax = _plot_hyperparameter_sensitivity(
            optimizer_path, hyperparam, ax, mode, metric, plot_std
        )
    if reference_path is not None:
        pathes = _preprocess_path(reference_path)
        for reference_optimizer_path in pathes:
            metric = _determine_available_metric(reference_optimizer_path, metric)
            ax = _plot_hyperparameter_sensitivity(
                reference_optimizer_path, hyperparam, ax, mode, metric, plot_std
            )

    plt.xscale(xscale)
    plt.xlabel(hyperparam, fontsize=16)
    plt.ylabel(metric, fontsize=16)
    ax.tick_params(labelsize=14)
    ax.legend()
    if show:
        plt.show()
    return fig, ax


def plot_final_metric_vs_tuning_rank(
    optimizer_path, metric="valid_accuracies", show=True
):
    metric = _determine_available_metric(optimizer_path, metric)
    ranks = create_setting_analyzer_ranking(optimizer_path, mode="final", metric=metric)
    means = []
    fig, ax = plt.subplots()
    for idx, rank in enumerate(ranks):
        means.append(rank.get_final_value(metric))
        values = rank.get_all_final_values(metric)
        for value in values:
            ax.scatter(idx, value, marker="x", color="b")
        ax.plot((idx, idx), (min(values), max(values)), color="grey", linestyle="--")
    ax.plot(range(len(ranks)), means)
    optimizer, testproblem = _get_optimizer_name_and_testproblem_from_path(
        optimizer_path
    )
    ax.set_title(optimizer + " on " + testproblem)
    ax.set_xlabel("tuning rank")
    ax.set_ylabel(metric)
    if show:
        plt.show()
    return fig, ax


[docs]def get_performance_dictionary(
    optimizer_path, mode="most", metric="valid_accuracies", conv_perf_file=None
):
    """Summarizes the performance of the optimizer.

    Args:
        optimizer_path (str): The path to the optimizer to analyse.
        mode (str): The mode by which to decide the best setting.
        metric (str): The metric by which to decide the best setting.
        conv_perf_file (str): Path to the convergence performance file. It is used to calculate the speed of the optimizer. Defaults to ``None`` in which case the speed measure is N.A.

    Returns:
        dict: A dictionary that holds the best setting and it's performance on the test set.
        """
    metric = _determine_available_metric(optimizer_path, metric)
    setting_analyzers_ranking = create_setting_analyzer_ranking(
        optimizer_path, mode, metric
    )
    sett = setting_analyzers_ranking[0]

    perf_dict = dict()
    metric = "test_accuracies" if "test_accuracies" in sett.aggregate else "test_losses"
    if mode == "final":
        perf_dict["Performance"] = sett.get_final_value(metric)
    elif mode == "best":
        perf_dict["Performance"] = sett.get_best_value(metric)
    elif mode == "most":
        # default performance for most is final value
        perf_dict["Performance"] = sett.get_final_value(metric)
    else:
        raise NotImplementedError

    if conv_perf_file is not None:
        perf_dict["Speed"] = sett.calculate_speed(conv_perf_file)
    else:
        perf_dict["Speed"] = "N.A."

    perf_dict["Hyperparameters"] = sett.aggregate["optimizer_hyperparams"]
    perf_dict["Training Parameters"] = sett.aggregate["training_params"]
    return perf_dict


def _plot_optimizer_performance(
    path,
    fig=None,
    ax=None,
    mode="most",
    metric="valid_accuracies",
    which="mean_and_std",
):
    """Plots the training curve of an optimizer.

    Args:
        path (str): Path to the optimizer or to a whole testproblem (in this case all optimizers in the testproblem folder are plotted).
        fig (matplotlib.Figure): Figure to plot the training curves in.
        ax (matplotlib.axes.Axes): The axes to plot the trainig curves for all metrices. Must have 4 subaxes.
        mode (str): The mode by which to decide the best setting.
        metric (str): The metric by which to decide the best setting.
        which (str): ['mean_and_std', 'median_and_quartiles', 'mean_and_std_log']
            - Solid plot mean or median or exponentiated mean of log
            - Shaded plots standard deviation or lower/upper quartiles or exponentiated std of log

    Returns:
        matplotlib.axes.Axes: The axes with the plots.
        """
    metrices = [
        "test_losses",
        "train_losses",
        "test_accuracies",
        "train_accuracies",
    ]
    if ax is None:  # create default axis for all 4 metrices
        fig, ax = plt.subplots(4, 1, sharex="col")

    pathes = _preprocess_path(path)
    for optimizer_path in pathes:
        setting_analyzer_ranking = create_setting_analyzer_ranking(
            optimizer_path, mode, metric
        )
        setting = setting_analyzer_ranking[0]

        optimizer_name = os.path.basename(optimizer_path)
        for idx, _metric in enumerate(metrics):
            if _metric in setting.aggregate:

                if which == "mean_and_std":
                    center = setting.aggregate[_metric]["mean"]
                    std = setting.aggregate[_metric]["std"]
                    low, high = center - std, center + std
                elif which == "median_and_quartiles":
                    center = setting.aggregate[_metric]["median"]
                    low = setting.aggregate[_metric]["lower_quartile"]
                    high = setting.aggregate[_metric]["upper_quartile"]
                else:
                    raise ValueError("Unknown value which={}".format(which))

                ax[idx].plot(center, label=optimizer_name)
                ax[idx].fill_between(range(len(center)), low, high, alpha=0.3)

    _, testproblem = _get_optimizer_name_and_testproblem_from_path(optimizer_path)
    ax[0].set_title(testproblem, fontsize=18)
    return fig, ax


[docs]def plot_optimizer_performance(
    path,
    fig=None,
    ax=None,
    mode="most",
    metric="valid_accuracies",
    reference_path=None,
    show=True,
    which="mean_and_std",
):
    """Plots the training curve of optimizers and addionally plots reference results from the ``reference_path``

    Args:
        path (str): Path to the optimizer or to a whole testproblem (in this case all optimizers in the testproblem folder are plotted).
        fig (matplotlib.Figure): Figure to plot the training curves in.
        ax (matplotlib.axes.Axes): The axes to plot the trainig curves for all metrices. Must have 4 subaxes (one for each metric).
        mode (str): The mode by which to decide the best setting.
        metric (str): The metric by which to decide the best setting.
        reference_path (str): Path to the reference optimizer or to a whole testproblem (in this case all optimizers in the testproblem folder are taken as reference).
        show (bool): Whether to show the plot or not.
        which (str): ['mean_and_std', 'median_and_quartiles'] Solid plot mean or median, shaded plots standard deviation or lower/upper quartiles.

    Returns:
        tuple: The figure and axes with the plots.

        """

    fig, ax = _plot_optimizer_performance(path, fig, ax, mode, metric, which=which)
    if reference_path is not None:
        fig, ax = _plot_optimizer_performance(
            reference_path, fig, ax, mode, metric, which=which
        )

    metrices = ["Test Loss", "Train Loss", "Test Accuracy", "Train Accuracy"]
    for idx, _metric in enumerate(metrices):
        # set y labels

        ax[idx].set_ylabel(_metric, fontsize=14)
        # rescale plots
        # ax[idx] = _rescale_ax(ax[idx])
        ax[idx].tick_params(labelsize=12)

    # show optimizer legends
    ax[0].legend(fontsize=12)

    ax[3].set_xlabel("Epochs", fontsize=14)

    plt.tight_layout()
    plt.subplots_adjust(wspace=0.2, hspace=0.15)

    if show:
        plt.show()
    return fig, ax