Improve mlos-viz for multiple repeats of a config and add tests (#633)

- Mark `mlos_viz` as `typed` for `mypy` - Bump version - Mock calls to matplotlib/dabl for testing - Add plotting of top-N configs - Improve plots for handling repeat config trials via variance error bars --------- Co-authored-by: Sergiy Matusevych <[email protected]>
microsoft · Jan 29, 2024 · a45f97d · a45f97d
1 parent 3a36797
commit a45f97d
Show file tree

Hide file tree

Showing 24 changed files with 738 additions and 72 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.0
+current_version = 0.4.1
 commit = True
 tag = True
 

diff --git a/.cspell.json b/.cspell.json
@@ -38,6 +38,7 @@
         "jupyterlab",
         "keepalive",
         "kwargs",
+        "kword",
         "libmamba",
         "linalg",
         "llamatune",
@@ -57,6 +58,7 @@
         "pylint",
         "pyplot",
         "pytest",
+        "quantile",
         "Quickstart",
         "refcnt",
         "rexec",
@@ -82,6 +84,8 @@
         "workerinput",
         "xdist",
         "xlabel",
+        "xlabels",
+        "xticks",
         "ylabel"
     ]
     // vim: set ft=jsonc:

diff --git a/.github/workflows/build-dist-test.ps1 b/.github/workflows/build-dist-test.ps1
@@ -114,6 +114,8 @@ if ($LASTEXITCODE -ne 0) {
 }
 
 # Run a simple mlos_viz test.
+# To do that, we need the fixtures from mlos_bench, so make those available too.
+$env:PYTHONPATH = "mlos_bench"
 conda run -n mlos-dist-test python -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
 if ($LASTEXITCODE -ne 0) {
     Write-Error "Failed to run mlos_viz tests."

diff --git a/Makefile b/Makefile
@@ -335,7 +335,8 @@ build/dist-test.$(PYTHON_VERSION).build-stamp: $(PYTHON_FILES) build/dist-test-e
 	# Run a simple test that uses the mlos_bench wheel (full tests can be checked with `make test`).
 	conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_bench/mlos_bench/tests/environments/mock_env_test.py
 	# Run a simple test that uses the mlos_viz wheel (full tests can be checked with `make test`).
-	conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
+	# To do that, we need the fixtures from mlos_bench, so make those available too.
+	PYTHONPATH=mlos_bench conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
 	touch $@
 
 dist-test-clean: dist-test-env-clean

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -36,7 +36,7 @@
 author = 'GSL'
 
 # The full version, including alpha/beta/rc tags
-release = '0.4.0'
+release = '0.4.1'
 
 try:
     from setuptools_scm import get_version

diff --git a/mlos_bench/_version.py b/mlos_bench/_version.py
@@ -7,4 +7,4 @@
 """
 
 # NOTE: This should be managed by bumpversion.
-_VERSION = '0.4.0'
+_VERSION = '0.4.1'
diff --git a/mlos_bench/mlos_bench/storage/base_experiment_data.py b/mlos_bench/mlos_bench/storage/base_experiment_data.py
@@ -8,7 +8,7 @@
 
 from abc import ABCMeta, abstractmethod
 from distutils.util import strtobool    # pylint: disable=deprecated-module
-from typing import Dict, Optional, Tuple, TYPE_CHECKING
+from typing import Dict, Literal, Optional, Tuple, TYPE_CHECKING
 
 import pandas
 
@@ -73,7 +73,7 @@ def __repr__(self) -> str:
 
     @property
     @abstractmethod
-    def objectives(self) -> Dict[str, str]:
+    def objectives(self) -> Dict[str, Literal["min", "max"]]:
         """
         Retrieve the experiment's objectives data from the storage.
 

diff --git a/mlos_bench/mlos_bench/storage/sql/experiment_data.py b/mlos_bench/mlos_bench/storage/sql/experiment_data.py
@@ -5,7 +5,7 @@
 """
 An interface to access the experiment benchmark data stored in SQL DB.
 """
-from typing import Dict, Optional
+from typing import Dict, Literal, Optional
 
 import logging
 
@@ -51,15 +51,16 @@ def __init__(self, *,
         self._schema = schema
 
     @property
-    def objectives(self) -> Dict[str, str]:
-        objectives: Dict[str, str] = {}
+    def objectives(self) -> Dict[str, Literal["min", "max"]]:
+        objectives: Dict[str, Literal["min", "max"]] = {}
         # First try to lookup the objectives from the experiment metadata in the storage layer.
         if hasattr(self._schema, "objectives"):
             with self._engine.connect() as conn:
                 objectives_db_data = conn.execute(
                     self._schema.objectives.select().where(
                         self._schema.objectives.c.exp_id == self._experiment_id,
                     ).order_by(
+                        # TODO: return weight as well
                         self._schema.objectives.c.weight.desc(),
                         self._schema.objectives.c.optimization_target.asc(),
                     )
@@ -98,6 +99,8 @@ def objectives(self) -> Dict[str, str]:
             elif opt_direction != objectives[opt_target]:
                 _LOG.warning("Experiment %s has multiple trial optimization directions for optimization_target %s=%s",
                              self, opt_target, objectives[opt_target])
+        for opt_tgt, opt_dir in objectives.items():
+            assert opt_dir in {None, "min", "max"}, f"Unexpected opt_dir {opt_dir} for opt_tgt {opt_tgt}."
         return objectives
 
     # TODO: provide a way to get individual data to avoid repeated bulk fetches where only small amounts of data is accessed.

diff --git a/mlos_bench/mlos_bench/storage/sql/tunable_config_trial_group_data.py b/mlos_bench/mlos_bench/storage/sql/tunable_config_trial_group_data.py
@@ -52,7 +52,8 @@ def _get_tunable_config_trial_group_id(self) -> int:
         with self._engine.connect() as conn:
             tunable_config_trial_group = conn.execute(
                 self._schema.trial.select().with_only_columns(
-                    func.min(self._schema.trial.c.trial_id).cast(Integer).label('tunable_config_trial_group_id'),
+                    func.min(self._schema.trial.c.trial_id).cast(Integer).label(    # pylint: disable=not-callable
+                        'tunable_config_trial_group_id'),
                 ).where(
                     self._schema.trial.c.exp_id == self._experiment_id,
                     self._schema.trial.c.config_id == self._tunable_config_id,

diff --git a/mlos_bench/mlos_bench/tests/storage/sql/__init__.py b/mlos_bench/mlos_bench/tests/storage/sql/__init__.py
@@ -3,5 +3,5 @@
 # Licensed under the MIT License.
 #
 """
-Test for mlos_bench sql storage.
+Tests for mlos_bench sql storage.
 """
diff --git a/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py b/mlos_bench/mlos_bench/tests/storage/sql/fixtures.py
@@ -63,10 +63,14 @@ def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Ex
     """
     # Add some trials to that experiment.
     # Note: we're just fabricating some made up function for the ML libraries to try and learn.
-    base_score = 5.0
+    base_score = 10.0
     tunable_name = "kernel_sched_latency_ns"
-    tunable_default = exp_storage.tunables.get_tunable(tunable_name)[0].default
+    tunable = exp_storage.tunables.get_tunable(tunable_name)[0]
+    tunable_default = tunable.default
     assert isinstance(tunable_default, int)
+    tunable_min = tunable.range[0]
+    tunable_max = tunable.range[1]
+    tunable_range = tunable_max - tunable_min
     seed = 42
     rand_seed(seed)
     opt = MockOptimizer(tunables=exp_storage.tunables, config={
@@ -85,14 +89,15 @@ def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Ex
                 "trial_number": config_i * CONFIG_TRIAL_REPEAT_COUNT + repeat_j + 1,
             })
             assert trial.tunable_config_id == config_i + 1
+            tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value)
+            tunable_value_norm = base_score * (tunable_value - tunable_min) / tunable_range
             trial.update_telemetry(status=Status.RUNNING, metrics=[
-                (datetime.utcnow(), "some-metric", base_score + random() / 10),
+                (datetime.utcnow(), "some-metric", tunable_value_norm + random() / 100),
             ])
-            tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value)
             trial.update(Status.SUCCEEDED, datetime.utcnow(), metrics={
                 # Give some variance on the score.
                 # And some influence from the tunable value.
-                "score": base_score + 10 * ((tunable_value / tunable_default) - 1) + random() / 10,
+                "score": tunable_value_norm + random() / 100
             })
     return exp_storage
 

diff --git a/mlos_bench/mlos_bench/tests/storage/trial_data_test.py b/mlos_bench/mlos_bench/tests/storage/trial_data_test.py
@@ -26,7 +26,7 @@ def test_exp_trial_data(exp_data: ExperimentData) -> None:
     assert trial.status == Status.SUCCEEDED
     assert trial.metadata_dict["trial_number"] == trial_id
     assert list(trial.results_dict.keys()) == ["score"]
-    assert trial.results_dict["score"] == pytest.approx(5.0, rel=0.1)
+    assert trial.results_dict["score"] == pytest.approx(0.0, abs=0.1)
     assert isinstance(trial.ts_start, datetime)
     assert isinstance(trial.ts_end, datetime)
     # Note: tests for telemetry are in test_update_telemetry()
diff --git a/mlos_core/_version.py b/mlos_core/_version.py
@@ -7,4 +7,4 @@
 """
 
 # NOTE: This should be managed by bumpversion.
-_VERSION = '0.4.0'
+_VERSION = '0.4.1'
diff --git a/mlos_viz/mlos_viz/__init__.py b/mlos_viz/mlos_viz/__init__.py
@@ -8,55 +8,22 @@
 """
 
 from enum import Enum
+from typing import Any, Dict, Literal, Optional
 
-import warnings
-
-from matplotlib import pyplot as plt
-import seaborn as sns
+import pandas
 
 from mlos_bench.storage.base_experiment_data import ExperimentData
+from mlos_viz import base
+from mlos_viz.util import expand_results_data_args
 
 
 class MlosVizMethod(Enum):
     """
     What method to use for visualizing the experiment results.
     """
 
-    AUTO = "dabl"   # use dabl as the current default
     DABL = "dabl"
-
-
-def _plot_optimizer_trends(exp_data: ExperimentData) -> None:
-    """
-    Plots the optimizer trends for the Experiment.
-
-    Intended to be used from a Jupyter notebook.
-
-    Parameters
-    ----------
-    exp_data: ExperimentData
-        The experiment data to plot.
-    """
-    for objective in exp_data.objectives:
-        objective_column = ExperimentData.RESULT_COLUMN_PREFIX + objective
-        results_df = exp_data.results_df
-        plt.rcParams["figure.figsize"] = (10, 4)
-
-        sns.scatterplot(
-            x=results_df.trial_id, y=results_df[objective_column],
-            alpha=0.7, label="Trial")  # Result of each trial
-        sns.lineplot(
-            x=results_df.trial_id, y=results_df[objective_column].cummin(),
-            label="Incumbent")  # the best result so far (cummin)
-
-        plt.yscale('log')
-
-        plt.xlabel("Trial number")
-        plt.ylabel(objective)
-
-        plt.title("Optimizer Trends for Experiment: " + exp_data.experiment_id)
-        plt.grid()
-        plt.show()  # type: ignore[no-untyped-call]
+    AUTO = DABL     # use dabl as the current default
 
 
 def ignore_plotter_warnings(plotter_method: MlosVizMethod = MlosVizMethod.AUTO) -> None:
@@ -69,18 +36,20 @@ def ignore_plotter_warnings(plotter_method: MlosVizMethod = MlosVizMethod.AUTO)
     plotter_method: MlosVizMethod
         The method to use for visualizing the experiment results.
     """
-    warnings.filterwarnings("ignore", category=FutureWarning)
-
+    base.ignore_plotter_warnings()
     if plotter_method == MlosVizMethod.DABL:
         import mlos_viz.dabl    # pylint: disable=import-outside-toplevel
         mlos_viz.dabl.ignore_plotter_warnings()
     else:
         raise NotImplementedError(f"Unhandled method: {plotter_method}")
 
 
-def plot(exp_data: ExperimentData,
+def plot(exp_data: Optional[ExperimentData] = None, *,
+         results_df: Optional[pandas.DataFrame] = None,
+         objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
          plotter_method: MlosVizMethod = MlosVizMethod.AUTO,
-         filter_warnings: bool = True) -> None:
+         filter_warnings: bool = True,
+         **kwargs: Any) -> None:
     """
     Plots the results of the experiment.
 
@@ -90,18 +59,28 @@ def plot(exp_data: ExperimentData,
     ----------
     exp_data: ExperimentData
         The experiment data to plot.
+    results_df : Optional["pandas.DataFrame"]
+        Optional results_df to plot.
+        If not provided, defaults to exp_data.results_df property.
+    objectives : Optional[Dict[str, Literal["min", "max"]]]
+        Optional objectives to plot.
+        If not provided, defaults to exp_data.objectives property.
     plotter_method: MlosVizMethod
         The method to use for visualizing the experiment results.
     filter_warnings: bool
         Whether or not to filter some warnings from the plotter.
+    kwargs : dict
+        Remaining keyword arguments are passed along to the underlying plotter(s).
     """
-    _plot_optimizer_trends(exp_data)
-
     if filter_warnings:
         ignore_plotter_warnings(plotter_method)
+    (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
+
+    base.plot_optimizer_trends(exp_data, results_df=results_df, objectives=objectives)
+    base.plot_top_n_configs(exp_data, results_df=results_df, objectives=objectives, **kwargs)
 
     if MlosVizMethod.DABL:
         import mlos_viz.dabl    # pylint: disable=import-outside-toplevel
-        mlos_viz.dabl.plot(exp_data)
+        mlos_viz.dabl.plot(exp_data, results_df=results_df, objectives=objectives)
     else:
         raise NotImplementedError(f"Unhandled method: {plotter_method}")