From 6e439d29c8fc69e2bc24ca3add20040dd1bc4232 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Wed, 10 Jul 2024 19:55:32 +0000 Subject: [PATCH 1/3] add support for hierarchical knobs --- .cspell.json | 1 + .../bayesian_optimizers/smac_optimizer.py | 2 +- mlos_core/mlos_core/optimizers/optimizer.py | 4 +- .../tests/optimizers/optimizer_test.py | 71 +++++++++++++++++++ 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/.cspell.json b/.cspell.json index 2cd9280fc8d..f4bc99063c2 100644 --- a/.cspell.json +++ b/.cspell.json @@ -21,6 +21,7 @@ "discretization", "discretize", "drivername", + "dropna", "dstpath", "dtype", "duckdb", diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index 8a433218fa5..f3681ce8d04 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -296,7 +296,7 @@ def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: self.optimizer_parameter_space.check_configuration(trial.config) assert trial.config.config_space == self.optimizer_parameter_space self.trial_info_map[trial.config] = trial - config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())) + config_df = pd.DataFrame([trial.config], columns=list(self.optimizer_parameter_space.keys())).dropna(axis=1) return config_df def register_pending(self, configurations: pd.DataFrame, context: Optional[pd.DataFrame] = None) -> None: diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index f1cedb85dcc..d361c85579e 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -92,14 +92,14 @@ def register(self, configurations: pd.DataFrame, scores: pd.DataFrame, if context is not None: assert len(configurations) == len(context), \ "Mismatched number of configurations and context." - assert configurations.shape[1] == len(self.parameter_space.values()), \ + assert configurations.shape[1] <= len(self.parameter_space.values()), \ "Mismatched configuration shape." self._observations.append((configurations, scores, context)) self._has_context = context is not None if self._space_adapter: configurations = self._space_adapter.inverse_transform(configurations) - assert configurations.shape[1] == len(self.optimizer_parameter_space.values()), \ + assert configurations.shape[1] <= len(self.optimizer_parameter_space.values()), \ "Mismatched configuration shape after inverse transform." return self._register(configurations, scores, context) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index 67c7eddf3b2..fe28ef2f045 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -399,3 +399,74 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: assert isinstance(all_configs, pd.DataFrame) assert isinstance(all_scores, pd.DataFrame) assert all_contexts is None + + +@pytest.mark.parametrize(("optimizer_type", "kwargs"), [ + # Default optimizer + (None, {}), + # Enumerate all supported Optimizers + *[(member, {}) for member in OptimizerType], + # Optimizer with non-empty kwargs argument +]) +def test_hierarchical_input_space(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None: + """ + Toy problem to test the optimizers with hierarchical types to ensure that the returned types are properly handled + """ + max_iterations = 10 + if kwargs is None: + kwargs = {} + + def objective(point: pd.DataFrame) -> pd.DataFrame: + # mix of hyperparameters, optimal is to select the highest possible + + return pd.DataFrame({"score": point["a"] + point["c"] if point["switch"].iloc[0] == "a" else 2 * point["b"] + point["c"]}) + + input_space = CS.ConfigurationSpace(seed=SEED) + # add a mix of numeric datatypes + input_space.add_hyperparameter(CS.CategoricalHyperparameter(name="switch", choices=["a", "b"])) + input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name="a", lower=0.0, upper=5.0)) + input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name="b", lower=0.0, upper=5.0)) + input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name="c", lower=0.0, upper=5.0)) + input_space.add_condition(CS.EqualsCondition(input_space["a"], input_space["switch"], "a")) + input_space.add_condition(CS.EqualsCondition(input_space["b"], input_space["switch"], "b")) + + if optimizer_type is None: + optimizer = OptimizerFactory.create( + parameter_space=input_space, + optimization_targets=['score'], + optimizer_kwargs=kwargs, + ) + else: + optimizer = OptimizerFactory.create( + parameter_space=input_space, + optimization_targets=['score'], + optimizer_type=optimizer_type, + optimizer_kwargs=kwargs, + ) + + for _ in range(max_iterations): + suggestion = optimizer.suggest() + + # Check that suggestion is returning valid column combinations + assert isinstance(suggestion, pd.DataFrame) + assert {'switch', 'c'}.issubset(suggestion.columns) + assert {'a'}.issubset(suggestion.columns) ^ {'b'}.issubset(suggestion.columns) + + # Check suggestion values are the expected dtype + assert suggestion["switch"].iloc[0] == "a" or suggestion["switch"].iloc[0] == "b" + if suggestion["switch"].iloc[0] == "a": + assert isinstance(suggestion['a'].iloc[0], np.floating) + else: + assert isinstance(suggestion['b'].iloc[0], np.floating) + assert isinstance(suggestion['c'].iloc[0], np.floating) + + # Check that suggestion is in the space + test_configuration = CS.Configuration(optimizer.parameter_space, suggestion.astype('O').iloc[0].to_dict()) + # Raises an error if outside of configuration space + test_configuration.is_valid_configuration() + + # Test registering the suggested configuration with a score. + observation = objective(suggestion) + assert isinstance(observation, pd.DataFrame) + optimizer.register(suggestion, observation) + From 7a6b87f84dce4abacedc5d7c3e004d3aeb28c31f Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Wed, 10 Jul 2024 20:19:10 +0000 Subject: [PATCH 2/3] remove blank line --- mlos_core/mlos_core/tests/optimizers/optimizer_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index a38f65f8d06..ed73760e7fd 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -470,4 +470,3 @@ def objective(point: pd.DataFrame) -> pd.DataFrame: observation = objective(suggestion) assert isinstance(observation, pd.DataFrame) optimizer.register(configs=suggestion, scores=observation, metadata=metadata) - From e2751d3430fe4ec5eea289523c14ccf8999fc8f5 Mon Sep 17 00:00:00 2001 From: Johannes Freischuetz Date: Wed, 10 Jul 2024 20:33:34 +0000 Subject: [PATCH 3/3] merge --- mlos_core/mlos_core/tests/optimizers/optimizer_test.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py index ed73760e7fd..5c1a8e9ea7e 100644 --- a/mlos_core/mlos_core/tests/optimizers/optimizer_test.py +++ b/mlos_core/mlos_core/tests/optimizers/optimizer_test.py @@ -418,12 +418,14 @@ def test_hierarchical_input_space(optimizer_type: Optional[OptimizerType], kwarg kwargs = {} def objective(point: pd.DataFrame) -> pd.DataFrame: - # mix of hyperparameters, optimal is to select the highest possible - - return pd.DataFrame({"score": point["a"] + point["c"] if point["switch"].iloc[0] == "a" else 2 * point["b"] + point["c"]}) + # Two different functions based on the switch + if point["switch"].iloc[0] == "a": + return pd.DataFrame({"score": point["a"] + point["c"]}) + else: + return pd.DataFrame({"score": 2 * point["b"] + point["c"]}) + # Initialize a hierarchical configuration space input_space = CS.ConfigurationSpace(seed=SEED) - # add a mix of numeric datatypes input_space.add_hyperparameter(CS.CategoricalHyperparameter(name="switch", choices=["a", "b"])) input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name="a", lower=0.0, upper=5.0)) input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name="b", lower=0.0, upper=5.0))