microsoft · motus · Jan 17, 2024 · Dec 1, 2023 · Dec 1, 2023 · Dec 1, 2023
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.0
+current_version = 0.3.1
 commit = True
 tag = True
 

@@ -36,7 +36,7 @@
 author = 'GSL'
 
 # The full version, including alpha/beta/rc tags
-release = '0.3.0'
+release = '0.3.1'
 
 try:
     from setuptools_scm import get_version

@@ -7,4 +7,4 @@
 """
 
 # NOTE: This should be managed by bumpversion.
-_VERSION = '0.3.0'
+_VERSION = '0.3.1'
@@ -7,7 +7,7 @@
                 "type": "int",
                 "meta": {"name_prefix": "/proc/sys/kernel/"},
                 "default": 500000,
-                "range": [-1, 1000000],
+                "range": [0, 1000000],
                 "special": [-1]
             },
             "sched_latency_ns": {

@@ -8,24 +8,38 @@
 
 import logging
 
-from typing import Optional
-
-from ConfigSpace.hyperparameters import Hyperparameter
-from ConfigSpace import UniformIntegerHyperparameter
-from ConfigSpace import UniformFloatHyperparameter
-from ConfigSpace import CategoricalHyperparameter
-from ConfigSpace import ConfigurationSpace, Configuration
-
-from mlos_bench.tunables.tunable import Tunable
+from typing import Dict, Optional, Tuple
+
+from ConfigSpace import (
+    CategoricalHyperparameter,
+    Configuration,
+    ConfigurationSpace,
+    EqualsCondition,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+from mlos_bench.tunables.tunable import Tunable, TunableValue
 from mlos_bench.tunables.tunable_groups import TunableGroups
 
 _LOG = logging.getLogger(__name__)
 
 
-def _tunable_to_hyperparameter(
-        tunable: Tunable, group_name: Optional[str] = None, cost: int = 0) -> Hyperparameter:
+class TunableValueKind:
+    """
+    Enum for the kind of the tunable value (special or not).
+    It is not a true enum because ConfigSpace wants string values.
     """
-    Convert a single Tunable to an equivalent ConfigSpace Hyperparameter object.
+
+    SPECIAL = "special"
+    RANGE = "range"
+
+
+def _tunable_to_configspace(
+        tunable: Tunable, group_name: Optional[str] = None, cost: int = 0) -> ConfigurationSpace:
+    """
+    Convert a single Tunable to an equivalent set of ConfigSpace Hyperparameter objects,
+    wrapped in a ConfigurationSpace for composability.
+    Note: this may be more than one Hyperparameter in the case of special value handling.
 
     Parameters
     ----------
@@ -38,25 +52,56 @@ def _tunable_to_hyperparameter(
 
     Returns
     -------
-    hyperparameter : Hyperparameter
-        A ConfigSpace Hyperparameter object that corresponds to the Tunable.
+    cs : ConfigurationSpace
+        A ConfigurationSpace object that corresponds to the Tunable.
     """
-    meta = {"group": group_name, "cost": cost}  # {"lower": "", "upper": "", "scaling": ""}
+    meta = {"group": group_name, "cost": cost}  # {"scaling": ""}
+
     if tunable.type == "categorical":
-        return CategoricalHyperparameter(
-            tunable.name, choices=tunable.categories,
-            default_value=tunable.default, meta=meta)
-    elif tunable.type == "int":
-        return UniformIntegerHyperparameter(
-            tunable.name, lower=tunable.range[0], upper=tunable.range[1],
-            default_value=tunable.default, meta=meta)
+        return ConfigurationSpace({
+            tunable.name: CategoricalHyperparameter(
+                name=tunable.name, choices=tunable.categories,
+                default_value=tunable.default, meta=meta)
+        })
+
+    if tunable.type == "int":
+        hp_type = UniformIntegerHyperparameter
     elif tunable.type == "float":
-        return UniformFloatHyperparameter(
-            tunable.name, lower=tunable.range[0], upper=tunable.range[1],
-            default_value=tunable.default, meta=meta)
+        hp_type = UniformFloatHyperparameter
     else:
         raise TypeError(f"Undefined Parameter Type: {tunable.type}")
 
+    if not tunable.special:
+        return ConfigurationSpace({
+            tunable.name: hp_type(
+                name=tunable.name, lower=tunable.range[0], upper=tunable.range[1],
+                default_value=tunable.default if tunable.in_range(tunable.default) else None,
+                meta=meta)
+        })
+
+    # Create three hyperparameters: one for regular values,
+    # one for special values, and one to choose between the two.
+    (special_name, type_name) = special_param_names(tunable.name)
+    cs = ConfigurationSpace({
+        tunable.name: hp_type(
+            name=tunable.name, lower=tunable.range[0], upper=tunable.range[1],
+            default_value=tunable.default if tunable.in_range(tunable.default) else None,
+            meta=meta),
+        special_name: CategoricalHyperparameter(
+            name=special_name, choices=tunable.special,
+            default_value=tunable.default if tunable.default in tunable.special else None,
+            meta=meta),
+        type_name: CategoricalHyperparameter(
+            name=type_name,
+            choices=[TunableValueKind.SPECIAL, TunableValueKind.RANGE],
+            default_value=TunableValueKind.SPECIAL,
+            weights=[0.5, 0.5]),  # TODO: Make weights configurable; FLAML requires uniform weights.
+    })
+    cs.add_condition(EqualsCondition(cs[special_name], cs[type_name], TunableValueKind.SPECIAL))
+    cs.add_condition(EqualsCondition(cs[tunable.name], cs[type_name], TunableValueKind.RANGE))
+
+    return cs
+
 
 def tunable_groups_to_configspace(tunables: TunableGroups, seed: Optional[int] = None) -> ConfigurationSpace:
     """
@@ -76,10 +121,11 @@ def tunable_groups_to_configspace(tunables: TunableGroups, seed: Optional[int] =
         A new ConfigurationSpace instance that corresponds to the input TunableGroups.
     """
     space = ConfigurationSpace(seed=seed)
-    space.add_hyperparameters([
-        _tunable_to_hyperparameter(tunable, group.name, group.get_current_cost())
-        for (tunable, group) in tunables
-    ])
+    for (tunable, group) in tunables:
+        space.add_configuration_space(
+            prefix="", delimiter="",
+            configuration_space=_tunable_to_configspace(
+                tunable, group.name, group.get_current_cost()))
     return space
 
 
@@ -97,5 +143,91 @@ def tunable_values_to_configuration(tunables: TunableGroups) -> Configuration:
     Configuration
         A ConfigSpace Configuration.
     """
+    values: Dict[str, TunableValue] = {}
+    for (tunable, _group) in tunables:
+        if tunable.special:
+            (special_name, type_name) = special_param_names(tunable.name)
+            if tunable.value in tunable.special:
+                values[type_name] = TunableValueKind.SPECIAL
+                values[special_name] = tunable.value
+            else:
+                values[type_name] = TunableValueKind.RANGE
+                values[tunable.name] = tunable.value
+        else:
+            values[tunable.name] = tunable.value
     configspace = tunable_groups_to_configspace(tunables)
-    return Configuration(configspace, values={tunable.name: tunable.value for (tunable, _group) in tunables})
+    return Configuration(configspace, values=values)
+
+
+def configspace_data_to_tunable_values(data: dict) -> dict:
+    """
+    Remove the fields that correspond to special values in ConfigSpace.
+    In particular, remove and keys suffixes added by `special_param_names`.
+    """
+    data = data.copy()
+    specials = [
+        special_param_name_strip(k)
+        for k in data.keys() if special_param_name_is_temp(k)
+    ]
+    for k in specials:
+        (special_name, type_name) = special_param_names(k)
+        if data[type_name] == TunableValueKind.SPECIAL:
+            data[k] = data[special_name]
+        if special_name in data:
+            del data[special_name]
+        del data[type_name]
+    return data
+
+
+def special_param_names(name: str) -> Tuple[str, str]:
+    """
+    Generate the names of the auxiliary hyperparameters that correspond
+    to a tunable that can have special values.
+
+    Parameters
+    ----------
+    name : str
+        The name of the tunable parameter.
+
+    Returns
+    -------
+    special_name : str
+        The name of the hyperparameter that corresponds to the special value.
+    type_name : str
+        The name of the hyperparameter that chooses between the regular and the special values.
+    """
+    return (name + "!special", name + "!type")
+
+
+def special_param_name_is_temp(name: str) -> bool:
+    """
+    Check if name corresponds to a temporary ConfigSpace parameter.
+
+    Parameters
+    ----------
+    name : str
+        The name of the hyperparameter.
+
+    Returns
+    -------
+    is_special : bool
+        True if the name corresponds to a temporary ConfigSpace hyperparameter.
+    """
+    return name.endswith("!type")
+
+
+def special_param_name_strip(name: str) -> str:
+    """
+    Remove the temporary suffix from a special parameter name.
+
+    Parameters
+    ----------
+    name : str
+        The name of the hyperparameter.
+
+    Returns
+    -------
+    stripped_name : str
+        The name of the hyperparameter without the temporary suffix.
+    """
+    return name.split("!", 1)[0]
@@ -10,7 +10,7 @@
 import os
 
 from types import TracebackType
-from typing import Optional, Sequence, Tuple, Type, Union
+from typing import Dict, Optional, Sequence, Tuple, Type, Union
 from typing_extensions import Literal
 
 import pandas as pd
@@ -20,10 +20,17 @@
 )
 
 from mlos_bench.environments.status import Status
+from mlos_bench.services.base_service import Service
+from mlos_bench.tunables.tunable import TunableValue
 from mlos_bench.tunables.tunable_groups import TunableGroups
 from mlos_bench.optimizers.base_optimizer import Optimizer
-from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace
-from mlos_bench.services.base_service import Service
+
+from mlos_bench.optimizers.convert_configspace import (
+    TunableValueKind,
+    configspace_data_to_tunable_values,
+    special_param_names,
+    tunable_groups_to_configspace,
+)
 
 _LOG = logging.getLogger(__name__)
 
@@ -103,16 +110,13 @@ def bulk_register(self, configs: Sequence[dict], scores: Sequence[Optional[float
             df_status_completed = df_status.apply(Status.is_completed)
             df_configs = df_configs[df_status_completed]
             df_scores = df_scores[df_status_completed]
-        # External data can have incorrect types (e.g., all strings).
-        for (tunable, _group) in self._tunables:
-            df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)
         self._opt.register(df_configs, df_scores)
         if _LOG.isEnabledFor(logging.DEBUG):
             (score, _) = self.get_best_observation()
             _LOG.debug("Warm-up end: %s = %s", self.target, score)
         return True
 
-    def _to_df(self, configs: Sequence[dict]) -> pd.DataFrame:
+    def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:
         """
         Select from past trials only the columns required in this experiment and
         impute default values for the tunables that are missing in the dataframe.
@@ -128,13 +132,28 @@ def _to_df(self, configs: Sequence[dict]) -> pd.DataFrame:
             A dataframe with past trials data, with missing values imputed.
         """
         df_configs = pd.DataFrame(configs)
-        tunables_names = self._tunables.get_param_values().keys()
+        tunables_names = list(self._tunables.get_param_values().keys())
         missing_cols = set(tunables_names).difference(df_configs.columns)
         for (tunable, _group) in self._tunables:
             if tunable.name in missing_cols:
                 df_configs[tunable.name] = tunable.default
             else:
                 df_configs[tunable.name].fillna(tunable.default, inplace=True)
+            # External data can have incorrect types (e.g., all strings).
+            df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)
+            # Add columns for tunables with special values.
+            if tunable.special:
+                (special_name, type_name) = special_param_names(tunable.name)
+                tunables_names += [special_name, type_name]
+                is_special = df_configs[tunable.name].apply(tunable.special.__contains__)
+                df_configs[type_name] = TunableValueKind.RANGE
+                df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL
+                if tunable.type == "int":
+                    # Make int column NULLABLE:
+                    df_configs[tunable.name] = df_configs[tunable.name].astype("Int64")
+                df_configs[special_name] = df_configs[tunable.name]
+                df_configs.loc[~is_special, special_name] = None
+                df_configs.loc[is_special, tunable.name] = None
         # By default, hyperparameters in ConfigurationSpace are sorted by name:
         df_configs = df_configs[sorted(tunables_names)]
         _LOG.debug("Loaded configs:\n%s", df_configs)
@@ -146,14 +165,14 @@ def suggest(self) -> TunableGroups:
         df_config = self._opt.suggest(defaults=self._start_with_defaults)
         self._start_with_defaults = False
         _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config)
-        return self._tunables.copy().assign(df_config.loc[0].to_dict())
+        return self._tunables.copy().assign(
+            configspace_data_to_tunable_values(df_config.loc[0].to_dict()))
 
     def register(self, tunables: TunableGroups, status: Status,
                  score: Optional[Union[float, dict]] = None) -> Optional[float]:
         score = super().register(tunables, status, score)  # With _opt_sign applied
         if status.is_completed():
-            # By default, hyperparameters in ConfigurationSpace are sorted by name:
-            df_config = pd.DataFrame(dict(sorted(tunables.get_param_values().items())), index=[0])
+            df_config = self._to_df([tunables.get_param_values()])
             _LOG.debug("Score: %s Dataframe:\n%s", score, df_config)
             self._opt.register(df_config, pd.Series([score], dtype=float))
         self._iter += 1
@@ -163,7 +182,7 @@ def get_best_observation(self) -> Union[Tuple[float, TunableGroups], Tuple[None,
         df_config = self._opt.get_best_observation()
         if len(df_config) == 0:
             return (None, None)
-        params = df_config.iloc[0].to_dict()
+        params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())
         _LOG.debug("Best observation: %s", params)
         score = params.pop("score") * self._opt_sign  # mlos_core always uses the `score` column
         return (score, self._tunables.copy().assign(params))
@@ -59,8 +59,8 @@
                 "description": "Cost of migrating the thread to another core",
                 "type": "int",
                 "default": -1,
-                "range": [-1, 500000],
-                "special": [-1]
+                "range": [0, 500000],
+                "special": [-1, 0]
             },
             "kernel_sched_latency_ns": {
                 "description": "Initial value for the scheduler period",

@@ -97,7 +97,7 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic
         [
             # Iteration 1: Expect first value to be the baseline
             f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " +
-            r"register DEBUG Score: 65\.67\d+ Dataframe:\s*$",
+            r"register DEBUG Score: 64\.88\d+ Dataframe:\s*$",
             # Iteration 2: The result may not always be deterministic
             f"^{_RE_DATE} mlos_core_optimizer\\.py:\\d+ " +
             r"register DEBUG Score: \d+\.\d+ Dataframe:\s*$",
@@ -106,6 +106,6 @@ def test_launch_main_app_opt(root_path: str, local_exec_service: LocalExecServic
             r"register DEBUG Score: \d+\.\d+ Dataframe:\s*$",
             # Final result: baseline is the optimum for the mock environment
             f"^{_RE_DATE} run\\.py:\\d+ " +
-            r"_optimize INFO Env: Mock environment best score: 65\.67\d+\s*$",
+            r"_optimize INFO Env: Mock environment best score: 64\.88\d+\s*$",
         ]
     )