Skip to content

Commit

Permalink
Improve mlos-viz for multiple repeats of a config and add tests (#633)
Browse files Browse the repository at this point in the history
- Mark `mlos_viz` as `typed` for `mypy`
- Bump version
- Mock calls to matplotlib/dabl for testing
- Add plotting of top-N configs
- Improve plots for handling repeat config trials via variance error
bars

---------

Co-authored-by: Sergiy Matusevych <[email protected]>
  • Loading branch information
bpkroth and motus authored Jan 29, 2024
1 parent 3a36797 commit a45f97d
Show file tree
Hide file tree
Showing 24 changed files with 738 additions and 72 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.0
current_version = 0.4.1
commit = True
tag = True

Expand Down
4 changes: 4 additions & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"jupyterlab",
"keepalive",
"kwargs",
"kword",
"libmamba",
"linalg",
"llamatune",
Expand All @@ -57,6 +58,7 @@
"pylint",
"pyplot",
"pytest",
"quantile",
"Quickstart",
"refcnt",
"rexec",
Expand All @@ -82,6 +84,8 @@
"workerinput",
"xdist",
"xlabel",
"xlabels",
"xticks",
"ylabel"
]
// vim: set ft=jsonc:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/build-dist-test.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ if ($LASTEXITCODE -ne 0) {
}

# Run a simple mlos_viz test.
# To do that, we need the fixtures from mlos_bench, so make those available too.
$env:PYTHONPATH = "mlos_bench"
conda run -n mlos-dist-test python -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
if ($LASTEXITCODE -ne 0) {
Write-Error "Failed to run mlos_viz tests."
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,8 @@ build/dist-test.$(PYTHON_VERSION).build-stamp: $(PYTHON_FILES) build/dist-test-e
# Run a simple test that uses the mlos_bench wheel (full tests can be checked with `make test`).
conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_bench/mlos_bench/tests/environments/mock_env_test.py
# Run a simple test that uses the mlos_viz wheel (full tests can be checked with `make test`).
conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
# To do that, we need the fixtures from mlos_bench, so make those available too.
PYTHONPATH=mlos_bench conda run -n mlos-dist-test-$(PYTHON_VERSION) python3 -m pytest mlos_viz/mlos_viz/tests/test_dabl_plot.py
touch $@

dist-test-clean: dist-test-env-clean
Expand Down
2 changes: 1 addition & 1 deletion doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
author = 'GSL'

# The full version, including alpha/beta/rc tags
release = '0.4.0'
release = '0.4.1'

try:
from setuptools_scm import get_version
Expand Down
2 changes: 1 addition & 1 deletion mlos_bench/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
"""

# NOTE: This should be managed by bumpversion.
_VERSION = '0.4.0'
_VERSION = '0.4.1'
4 changes: 2 additions & 2 deletions mlos_bench/mlos_bench/storage/base_experiment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from abc import ABCMeta, abstractmethod
from distutils.util import strtobool # pylint: disable=deprecated-module
from typing import Dict, Optional, Tuple, TYPE_CHECKING
from typing import Dict, Literal, Optional, Tuple, TYPE_CHECKING

import pandas

Expand Down Expand Up @@ -73,7 +73,7 @@ def __repr__(self) -> str:

@property
@abstractmethod
def objectives(self) -> Dict[str, str]:
def objectives(self) -> Dict[str, Literal["min", "max"]]:
"""
Retrieve the experiment's objectives data from the storage.
Expand Down
9 changes: 6 additions & 3 deletions mlos_bench/mlos_bench/storage/sql/experiment_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""
An interface to access the experiment benchmark data stored in SQL DB.
"""
from typing import Dict, Optional
from typing import Dict, Literal, Optional

import logging

Expand Down Expand Up @@ -51,15 +51,16 @@ def __init__(self, *,
self._schema = schema

@property
def objectives(self) -> Dict[str, str]:
objectives: Dict[str, str] = {}
def objectives(self) -> Dict[str, Literal["min", "max"]]:
objectives: Dict[str, Literal["min", "max"]] = {}
# First try to lookup the objectives from the experiment metadata in the storage layer.
if hasattr(self._schema, "objectives"):
with self._engine.connect() as conn:
objectives_db_data = conn.execute(
self._schema.objectives.select().where(
self._schema.objectives.c.exp_id == self._experiment_id,
).order_by(
# TODO: return weight as well
self._schema.objectives.c.weight.desc(),
self._schema.objectives.c.optimization_target.asc(),
)
Expand Down Expand Up @@ -98,6 +99,8 @@ def objectives(self) -> Dict[str, str]:
elif opt_direction != objectives[opt_target]:
_LOG.warning("Experiment %s has multiple trial optimization directions for optimization_target %s=%s",
self, opt_target, objectives[opt_target])
for opt_tgt, opt_dir in objectives.items():
assert opt_dir in {None, "min", "max"}, f"Unexpected opt_dir {opt_dir} for opt_tgt {opt_tgt}."
return objectives

# TODO: provide a way to get individual data to avoid repeated bulk fetches where only small amounts of data is accessed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def _get_tunable_config_trial_group_id(self) -> int:
with self._engine.connect() as conn:
tunable_config_trial_group = conn.execute(
self._schema.trial.select().with_only_columns(
func.min(self._schema.trial.c.trial_id).cast(Integer).label('tunable_config_trial_group_id'),
func.min(self._schema.trial.c.trial_id).cast(Integer).label( # pylint: disable=not-callable
'tunable_config_trial_group_id'),
).where(
self._schema.trial.c.exp_id == self._experiment_id,
self._schema.trial.c.config_id == self._tunable_config_id,
Expand Down
2 changes: 1 addition & 1 deletion mlos_bench/mlos_bench/tests/storage/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
# Licensed under the MIT License.
#
"""
Test for mlos_bench sql storage.
Tests for mlos_bench sql storage.
"""
15 changes: 10 additions & 5 deletions mlos_bench/mlos_bench/tests/storage/sql/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,14 @@ def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Ex
"""
# Add some trials to that experiment.
# Note: we're just fabricating some made up function for the ML libraries to try and learn.
base_score = 5.0
base_score = 10.0
tunable_name = "kernel_sched_latency_ns"
tunable_default = exp_storage.tunables.get_tunable(tunable_name)[0].default
tunable = exp_storage.tunables.get_tunable(tunable_name)[0]
tunable_default = tunable.default
assert isinstance(tunable_default, int)
tunable_min = tunable.range[0]
tunable_max = tunable.range[1]
tunable_range = tunable_max - tunable_min
seed = 42
rand_seed(seed)
opt = MockOptimizer(tunables=exp_storage.tunables, config={
Expand All @@ -85,14 +89,15 @@ def exp_storage_with_trials(exp_storage: SqlStorage.Experiment) -> SqlStorage.Ex
"trial_number": config_i * CONFIG_TRIAL_REPEAT_COUNT + repeat_j + 1,
})
assert trial.tunable_config_id == config_i + 1
tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value)
tunable_value_norm = base_score * (tunable_value - tunable_min) / tunable_range
trial.update_telemetry(status=Status.RUNNING, metrics=[
(datetime.utcnow(), "some-metric", base_score + random() / 10),
(datetime.utcnow(), "some-metric", tunable_value_norm + random() / 100),
])
tunable_value = float(tunables.get_tunable(tunable_name)[0].numerical_value)
trial.update(Status.SUCCEEDED, datetime.utcnow(), metrics={
# Give some variance on the score.
# And some influence from the tunable value.
"score": base_score + 10 * ((tunable_value / tunable_default) - 1) + random() / 10,
"score": tunable_value_norm + random() / 100
})
return exp_storage

Expand Down
2 changes: 1 addition & 1 deletion mlos_bench/mlos_bench/tests/storage/trial_data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_exp_trial_data(exp_data: ExperimentData) -> None:
assert trial.status == Status.SUCCEEDED
assert trial.metadata_dict["trial_number"] == trial_id
assert list(trial.results_dict.keys()) == ["score"]
assert trial.results_dict["score"] == pytest.approx(5.0, rel=0.1)
assert trial.results_dict["score"] == pytest.approx(0.0, abs=0.1)
assert isinstance(trial.ts_start, datetime)
assert isinstance(trial.ts_end, datetime)
# Note: tests for telemetry are in test_update_telemetry()
2 changes: 1 addition & 1 deletion mlos_core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
"""

# NOTE: This should be managed by bumpversion.
_VERSION = '0.4.0'
_VERSION = '0.4.1'
69 changes: 24 additions & 45 deletions mlos_viz/mlos_viz/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,22 @@
"""

from enum import Enum
from typing import Any, Dict, Literal, Optional

import warnings

from matplotlib import pyplot as plt
import seaborn as sns
import pandas

from mlos_bench.storage.base_experiment_data import ExperimentData
from mlos_viz import base
from mlos_viz.util import expand_results_data_args


class MlosVizMethod(Enum):
"""
What method to use for visualizing the experiment results.
"""

AUTO = "dabl" # use dabl as the current default
DABL = "dabl"


def _plot_optimizer_trends(exp_data: ExperimentData) -> None:
"""
Plots the optimizer trends for the Experiment.
Intended to be used from a Jupyter notebook.
Parameters
----------
exp_data: ExperimentData
The experiment data to plot.
"""
for objective in exp_data.objectives:
objective_column = ExperimentData.RESULT_COLUMN_PREFIX + objective
results_df = exp_data.results_df
plt.rcParams["figure.figsize"] = (10, 4)

sns.scatterplot(
x=results_df.trial_id, y=results_df[objective_column],
alpha=0.7, label="Trial") # Result of each trial
sns.lineplot(
x=results_df.trial_id, y=results_df[objective_column].cummin(),
label="Incumbent") # the best result so far (cummin)

plt.yscale('log')

plt.xlabel("Trial number")
plt.ylabel(objective)

plt.title("Optimizer Trends for Experiment: " + exp_data.experiment_id)
plt.grid()
plt.show() # type: ignore[no-untyped-call]
AUTO = DABL # use dabl as the current default


def ignore_plotter_warnings(plotter_method: MlosVizMethod = MlosVizMethod.AUTO) -> None:
Expand All @@ -69,18 +36,20 @@ def ignore_plotter_warnings(plotter_method: MlosVizMethod = MlosVizMethod.AUTO)
plotter_method: MlosVizMethod
The method to use for visualizing the experiment results.
"""
warnings.filterwarnings("ignore", category=FutureWarning)

base.ignore_plotter_warnings()
if plotter_method == MlosVizMethod.DABL:
import mlos_viz.dabl # pylint: disable=import-outside-toplevel
mlos_viz.dabl.ignore_plotter_warnings()
else:
raise NotImplementedError(f"Unhandled method: {plotter_method}")


def plot(exp_data: ExperimentData,
def plot(exp_data: Optional[ExperimentData] = None, *,
results_df: Optional[pandas.DataFrame] = None,
objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
plotter_method: MlosVizMethod = MlosVizMethod.AUTO,
filter_warnings: bool = True) -> None:
filter_warnings: bool = True,
**kwargs: Any) -> None:
"""
Plots the results of the experiment.
Expand All @@ -90,18 +59,28 @@ def plot(exp_data: ExperimentData,
----------
exp_data: ExperimentData
The experiment data to plot.
results_df : Optional["pandas.DataFrame"]
Optional results_df to plot.
If not provided, defaults to exp_data.results_df property.
objectives : Optional[Dict[str, Literal["min", "max"]]]
Optional objectives to plot.
If not provided, defaults to exp_data.objectives property.
plotter_method: MlosVizMethod
The method to use for visualizing the experiment results.
filter_warnings: bool
Whether or not to filter some warnings from the plotter.
kwargs : dict
Remaining keyword arguments are passed along to the underlying plotter(s).
"""
_plot_optimizer_trends(exp_data)

if filter_warnings:
ignore_plotter_warnings(plotter_method)
(results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)

base.plot_optimizer_trends(exp_data, results_df=results_df, objectives=objectives)
base.plot_top_n_configs(exp_data, results_df=results_df, objectives=objectives, **kwargs)

if MlosVizMethod.DABL:
import mlos_viz.dabl # pylint: disable=import-outside-toplevel
mlos_viz.dabl.plot(exp_data)
mlos_viz.dabl.plot(exp_data, results_df=results_df, objectives=objectives)
else:
raise NotImplementedError(f"Unhandled method: {plotter_method}")
Loading

0 comments on commit a45f97d

Please sign in to comment.