From 022ca9c1212679f7218ec589eac59aa5c7caba2c Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 29 Nov 2024 13:15:44 +0200 Subject: [PATCH 01/12] Introduce TaskConstraint as dataclass --- amlb/benchmark.py | 6 ++++-- amlb/frameworks/definitions.py | 10 ++++++++++ amlb/resources.py | 12 +++++++----- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 90a010053..097200b6a 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -124,8 +124,10 @@ def __init__( self.framework_def, self.framework_name = framework, framework.name log.debug("Using framework definition: %s.", self.framework_def) - self.constraint_def, self.constraint_name = rget().constraint_definition( - constraint_name + task_constraint = rget().constraint_definition(constraint_name) + self.constraint_def, self.constraint_name = ( + task_constraint, + task_constraint.name, ) log.debug("Using constraint definition: %s.", self.constraint_def) diff --git a/amlb/frameworks/definitions.py b/amlb/frameworks/definitions.py index bc3dcb446..9e55cd48b 100644 --- a/amlb/frameworks/definitions.py +++ b/amlb/frameworks/definitions.py @@ -281,3 +281,13 @@ def load_framework_definition( framework_name, tag = framework_name.split(":", 1) definition_ns, name = configuration.framework_definition(framework_name, tag) return Framework(**Namespace.dict(definition_ns)) + + +@dataclass +class TaskConstraint: + name: str + folds: int + max_runtime_seconds: int + cores: int + min_vol_size_mb: int | None = None + ec2_volume_type: str | None = None diff --git a/amlb/resources.py b/amlb/resources.py index 0e37b240d..62541b86a 100644 --- a/amlb/resources.py +++ b/amlb/resources.py @@ -6,6 +6,7 @@ from __future__ import annotations import copy +import dataclasses import logging import os import random @@ -14,6 +15,7 @@ from amlb.benchmarks.parser import benchmark_load from amlb.frameworks import default_tag, load_framework_definitions +from .frameworks.definitions import TaskConstraint from .utils import ( Namespace, lazy_property, @@ -172,7 +174,7 @@ def _frameworks(self): return load_framework_definitions(frameworks_file, self.config) @memoize - def constraint_definition(self, name): + def constraint_definition(self, name: str) -> TaskConstraint: """ :param name: name of the benchmark constraint definition as defined in the constraints file :return: a Namespace object with the constraint config (folds, cores, max_runtime_seconds, ...) for the current benchmamk run. @@ -184,7 +186,7 @@ def constraint_definition(self, name): name, self.config.benchmarks.constraints_file ) ) - return constraint, constraint.name + return TaskConstraint(**Namespace.dict(constraint)) @lazy_property def _constraints(self): @@ -206,8 +208,7 @@ def _constraints(self): constraints_lookup[name.lower()] = c return constraints_lookup - # @memoize - def benchmark_definition(self, name, defaults=None): + def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None): """ :param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id. :param defaults: defaults used as a base config for each task in the benchmark definition @@ -216,7 +217,8 @@ def benchmark_definition(self, name, defaults=None): hard_defaults, tasks, benchmark_path, benchmark_name = benchmark_load( name, self.config.benchmarks.definition_dir ) - + if defaults is not None: + defaults = Namespace(**dataclasses.asdict(defaults)) defaults = Namespace.merge( defaults, hard_defaults, Namespace(name="__defaults__") ) From fa77b974afd432b6a646a6dbc532831bb23b2629 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 29 Nov 2024 13:59:00 +0200 Subject: [PATCH 02/12] Rename hard_defaults to file_defaults to better indicate meaning --- amlb/benchmarks/parser.py | 6 +++--- amlb/resources.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/amlb/benchmarks/parser.py b/amlb/benchmarks/parser.py index ca5997fb6..239742831 100644 --- a/amlb/benchmarks/parser.py +++ b/amlb/benchmarks/parser.py @@ -26,8 +26,8 @@ def benchmark_load( name, benchmark_definition_dirs ) - hard_defaults = next((task for task in tasks if task.name == "__defaults__"), None) - tasks = [task for task in tasks if task is not hard_defaults] + file_defaults = next((task for task in tasks if task.name == "__defaults__"), None) + tasks = [task for task in tasks if task is not file_defaults] for t in tasks: t.name = str_sanitize(t.name) - return hard_defaults, tasks, benchmark_path, str_sanitize(benchmark_name) + return file_defaults, tasks, benchmark_path, str_sanitize(benchmark_name) diff --git a/amlb/resources.py b/amlb/resources.py index 62541b86a..4270f51a5 100644 --- a/amlb/resources.py +++ b/amlb/resources.py @@ -214,13 +214,13 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None :param defaults: defaults used as a base config for each task in the benchmark definition :return: """ - hard_defaults, tasks, benchmark_path, benchmark_name = benchmark_load( + file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load( name, self.config.benchmarks.definition_dir ) if defaults is not None: defaults = Namespace(**dataclasses.asdict(defaults)) defaults = Namespace.merge( - defaults, hard_defaults, Namespace(name="__defaults__") + defaults, file_defaults, Namespace(name="__defaults__") ) for task in tasks: task |= defaults # add missing keys from hard defaults + defaults From 8985d46045394ba8865c50fdd77bf15a63c9afc9 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 29 Nov 2024 14:19:25 +0200 Subject: [PATCH 03/12] Start Task defintion --- amlb/frameworks/definitions.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/amlb/frameworks/definitions.py b/amlb/frameworks/definitions.py index 9e55cd48b..d7ffccb5c 100644 --- a/amlb/frameworks/definitions.py +++ b/amlb/frameworks/definitions.py @@ -291,3 +291,14 @@ class TaskConstraint: cores: int min_vol_size_mb: int | None = None ec2_volume_type: str | None = None + + +@dataclass +class Task(TaskConstraint): + dataset: Namespace | None = None # TODO: Specify file dataset description + enabled: bool = True + description: str = "" + openml_task_id: int | None = None + metric: str | list[str] | None = None + # Specific to time series + quantile_levels: list[float] | None = None From 666ca3e09df109e846bc5cbaf2964208631465ae Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 29 Nov 2024 14:24:20 +0200 Subject: [PATCH 04/12] Setup for testing benchmark loading --- amlb/resources.py | 29 +++-- .../resources/test_benchmark_definition.py | 120 ++++++++++++++++++ 2 files changed, 137 insertions(+), 12 deletions(-) create mode 100644 tests/unit/amlb/resources/test_benchmark_definition.py diff --git a/amlb/resources.py b/amlb/resources.py index 4270f51a5..17aa18e10 100644 --- a/amlb/resources.py +++ b/amlb/resources.py @@ -209,13 +209,18 @@ def _constraints(self): return constraints_lookup def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None): + return self._benchmark_definition(name, self.config, defaults) + + def _benchmark_definition( + self, name: str, config_: Namespace, defaults: TaskConstraint | None = None + ): """ :param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id. :param defaults: defaults used as a base config for each task in the benchmark definition :return: """ file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load( - name, self.config.benchmarks.definition_dir + name, config_.benchmarks.definition_dir ) if defaults is not None: defaults = Namespace(**dataclasses.asdict(defaults)) @@ -224,15 +229,16 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None ) for task in tasks: task |= defaults # add missing keys from hard defaults + defaults - self._validate_task(task) + Resources._validate_task(task, config_) - self._validate_task(defaults, lenient=True) + Resources._validate_task(defaults, config_, lenient=True) defaults.enabled = False tasks.append(defaults) log.debug("Available task definitions:\n%s", tasks) return tasks, benchmark_name, benchmark_path - def _validate_task(self, task, lenient=False): + @staticmethod + def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False): missing = [] for conf in ["name"]: if task[conf] is None: @@ -253,13 +259,16 @@ def _validate_task(self, task, lenient=False): "quantile_levels", ]: if task[conf] is None: - task[conf] = self.config.benchmarks.defaults[conf] + task[conf] = config_.benchmarks.defaults[conf] log.debug( "Config `{config}` not set for task {name}, using default `{value}`.".format( config=conf, name=task.name, value=task[conf] ) ) + if task["metric"] is None: + task["metric"] = None + conf = "id" if task[conf] is None: task[conf] = ( @@ -287,14 +296,10 @@ def _validate_task(self, task, lenient=False): "but task definition is {task}".format(task=str(task)) ) - conf = "metric" - if task[conf] is None: - task[conf] = None - conf = "ec2_instance_type" if task[conf] is None: - i_series = self.config.aws.ec2.instance_type.series - i_map = self.config.aws.ec2.instance_type.map + i_series = config_.aws.ec2.instance_type.series + i_map = config_.aws.ec2.instance_type.map if str(task.cores) in i_map: i_size = i_map[str(task.cores)] elif task.cores > 0: @@ -315,7 +320,7 @@ def _validate_task(self, task, lenient=False): conf = "ec2_volume_type" if task[conf] is None: - task[conf] = self.config.aws.ec2.volume_type + task[conf] = config_.aws.ec2.volume_type log.debug( "Config `{config}` not set for task {name}, using default `{value}`.".format( config=conf, name=task.name, value=task[conf] diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py new file mode 100644 index 000000000..a1841b87d --- /dev/null +++ b/tests/unit/amlb/resources/test_benchmark_definition.py @@ -0,0 +1,120 @@ +from functools import partial + +import pytest + +from amlb import Resources +from amlb.utils import Namespace + + +@pytest.fixture +def amlb_dummy_configuration() -> Namespace: + defaults = { + "max_runtime_seconds": 0, + "cores": 1, + "folds": 2, + "max_mem_size_mb": 3, + "min_vol_size_mb": 4, + "quantile_levels": 5, + } + + aws_defaults = { + "ec2": { + "volume_type": "gp3", + "instance_type": { + "series": "m5", + "map": {"4": "small", "default": "large"}, + }, + } + } + return Namespace( + aws=Namespace.from_dict(aws_defaults), + benchmarks=Namespace(defaults=Namespace.from_dict(defaults)), + ) + + +def test_validate_task_strict_requires_name(): + with pytest.raises(ValueError) as excinfo: + Resources._validate_task( + task=Namespace(), + config_=Namespace(), + lenient=False, + ) + assert "mandatory properties as missing" in excinfo.value.args[0] + + +def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace): + strict_validate = partial( + Resources._validate_task, config_=amlb_dummy_configuration, lenient=False + ) + with pytest.raises(ValueError) as excinfo: + strict_validate(task=Namespace(name="foo")) + assert "must contain an ID or one property" in excinfo.value.args[0] + + +@pytest.mark.parametrize( + ("properties", "expected"), + [ + (Namespace(id="bar"), "bar"), + (Namespace(openml_task_id=42), "openml.org/t/42"), + (Namespace(openml_dataset_id=42), "openml.org/d/42"), + (Namespace(dataset="bar"), "bar"), + (Namespace(dataset=Namespace(id="bar")), "bar"), + ], +) +def test_validate_task_id_formatting( + properties: Namespace, expected: str, amlb_dummy_configuration: Namespace +): + task = Namespace(name="foo") | properties + Resources._validate_task(task=task, config_=amlb_dummy_configuration) + assert task["id"] == expected + + +def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespace): + task = Namespace(name=None) + Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + + config = Namespace.dict(amlb_dummy_configuration, deep=True) + for setting, default in config["benchmarks"]["defaults"].items(): + assert task[setting] == default + assert task["ec2_volume_type"] == amlb_dummy_configuration.aws.ec2.volume_type + + +def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace): + task = Namespace(name=None, cores=42) + Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + + config = Namespace.dict(amlb_dummy_configuration, deep=True) + assert task.cores == 42 + for setting, default in config["benchmarks"]["defaults"].items(): + if setting != "cores": + assert task[setting] == default + + +def test_validate_task_looks_up_instance_type(amlb_dummy_configuration: Namespace): + instance_type = amlb_dummy_configuration.aws.ec2.instance_type + reverse_size_map = {v: k for k, v in Namespace.dict(instance_type.map).items()} + n_cores_for_small = int(reverse_size_map["small"]) + + task = Namespace(name="foo", cores=n_cores_for_small) + Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + assert ( + task["ec2_instance_type"] == "m5.small" + ), "Should resolve to the instance type with the exact amount of cores" + + task = Namespace(name="foo", cores=n_cores_for_small - 1) + Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + assert ( + task["ec2_instance_type"] == "m5.small" + ), "If exact amount of cores are not available, should resolve to next biggest" + + task = Namespace(name="foo", cores=n_cores_for_small + 1) + Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + assert ( + task["ec2_instance_type"] == "m5.large" + ), "If bigger than largest in map, should revert to default" + + task = Namespace(name="foo", ec2_instance_type="bar") + Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + assert ( + task["ec2_instance_type"] == "bar" + ), "Should not overwrite explicit configuration" From 6ef3c55c36f137729a739f21d1ea5bb11b03d8fa Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 29 Nov 2024 20:46:30 +0200 Subject: [PATCH 05/12] Refactor task name check --- amlb/resources.py | 10 ++-------- tests/unit/amlb/resources/test_benchmark_definition.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/amlb/resources.py b/amlb/resources.py index 17aa18e10..87fe4e26d 100644 --- a/amlb/resources.py +++ b/amlb/resources.py @@ -239,15 +239,9 @@ def _benchmark_definition( @staticmethod def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False): - missing = [] - for conf in ["name"]: - if task[conf] is None: - missing.append(conf) - if not lenient and len(missing) > 0: + if not lenient and task["name"] is None: raise ValueError( - "{missing} mandatory properties as missing in task definition {taskdef}.".format( - missing=missing, taskdef=task - ) + f"`name` is mandatory but missing in task definition {task}." ) for conf in [ diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py index a1841b87d..e8874cfaa 100644 --- a/tests/unit/amlb/resources/test_benchmark_definition.py +++ b/tests/unit/amlb/resources/test_benchmark_definition.py @@ -39,7 +39,7 @@ def test_validate_task_strict_requires_name(): config_=Namespace(), lenient=False, ) - assert "mandatory properties as missing" in excinfo.value.args[0] + assert "mandatory but missing" in excinfo.value.args[0] def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace): From cd5c9120533598ae9745488139466f37f366cd0d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 29 Nov 2024 20:56:10 +0200 Subject: [PATCH 06/12] Refactor task id resolution --- amlb/resources.py | 52 +++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/amlb/resources.py b/amlb/resources.py index 87fe4e26d..1fcdad7be 100644 --- a/amlb/resources.py +++ b/amlb/resources.py @@ -244,6 +244,15 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False): f"`name` is mandatory but missing in task definition {task}." ) + if task["id"] is None: + task["id"] = Resources.generate_task_identifier(task) + if not lenient and task["id"] is None: + raise ValueError( + "task definition must contain an ID or one property " + "among ['openml_task_id', 'dataset'] to create an ID, " + "but task definition is {task}".format(task=str(task)) + ) + for conf in [ "max_runtime_seconds", "cores", @@ -259,37 +268,8 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False): config=conf, name=task.name, value=task[conf] ) ) - if task["metric"] is None: task["metric"] = None - - conf = "id" - if task[conf] is None: - task[conf] = ( - "openml.org/t/{}".format(task.openml_task_id) - if task["openml_task_id"] is not None - else "openml.org/d/{}".format(task.openml_dataset_id) - if task["openml_dataset_id"] is not None - else ( - ( - task.dataset["id"] - if isinstance(task.dataset, (dict, Namespace)) - else task.dataset - if isinstance(task.dataset, str) - else None - ) - or task.name - ) - if task["dataset"] is not None - else None - ) - if not lenient and task[conf] is None: - raise ValueError( - "task definition must contain an ID or one property " - "among ['openml_task_id', 'dataset'] to create an ID, " - "but task definition is {task}".format(task=str(task)) - ) - conf = "ec2_instance_type" if task[conf] is None: i_series = config_.aws.ec2.instance_type.series @@ -321,6 +301,20 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False): ) ) + @staticmethod + def generate_task_identifier(task: Namespace) -> str | None: + if task["openml_task_id"] is not None: + return f"openml.org/t/{task.openml_task_id}" + if task["openml_dataset_id"] is not None: + return f"openml.org/d/{task.openml_dataset_id}" + if task["dataset"] is None: + return None + if isinstance(task.dataset, (dict, Namespace)): + return task.dataset["id"] + if isinstance(task.dataset, str): + return task.dataset + return task.name + __INSTANCE__: Resources | None = None From 57adc9569090b2903f99d8d95bbfbf4b31a3dffe Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sat, 30 Nov 2024 09:07:28 +0200 Subject: [PATCH 07/12] Break up task validation and setting defaults --- amlb/resources.py | 87 +++++++++++-------- .../resources/test_benchmark_definition.py | 39 ++++----- 2 files changed, 65 insertions(+), 61 deletions(-) diff --git a/amlb/resources.py b/amlb/resources.py index 1fcdad7be..8444b934a 100644 --- a/amlb/resources.py +++ b/amlb/resources.py @@ -229,30 +229,19 @@ def _benchmark_definition( ) for task in tasks: task |= defaults # add missing keys from hard defaults + defaults - Resources._validate_task(task, config_) + Resources._validate_task(task) + Resources._add_task_defaults(task, config_) - Resources._validate_task(defaults, config_, lenient=True) + Resources._add_task_defaults(defaults, config_) defaults.enabled = False tasks.append(defaults) log.debug("Available task definitions:\n%s", tasks) return tasks, benchmark_name, benchmark_path @staticmethod - def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False): - if not lenient and task["name"] is None: - raise ValueError( - f"`name` is mandatory but missing in task definition {task}." - ) - + def _add_task_defaults(task: Namespace, config_: Namespace): if task["id"] is None: task["id"] = Resources.generate_task_identifier(task) - if not lenient and task["id"] is None: - raise ValueError( - "task definition must contain an ID or one property " - "among ['openml_task_id', 'dataset'] to create an ID, " - "but task definition is {task}".format(task=str(task)) - ) - for conf in [ "max_runtime_seconds", "cores", @@ -265,42 +254,66 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False): task[conf] = config_.benchmarks.defaults[conf] log.debug( "Config `{config}` not set for task {name}, using default `{value}`.".format( - config=conf, name=task.name, value=task[conf] + config=conf, name=task["name"], value=task[conf] ) ) + if task["metric"] is None: task["metric"] = None - conf = "ec2_instance_type" - if task[conf] is None: - i_series = config_.aws.ec2.instance_type.series - i_map = config_.aws.ec2.instance_type.map - if str(task.cores) in i_map: - i_size = i_map[str(task.cores)] - elif task.cores > 0: - supported_cores = list( - map(int, Namespace.dict(i_map).keys() - {"default"}) - ) - supported_cores.sort() - cores = next((c for c in supported_cores if c >= task.cores), "default") - i_size = i_map[str(cores)] - else: - i_size = i_map.default - task[conf] = ".".join([i_series, i_size]) + + + if task["ec2_instance_type"] is None: + task["ec2_instance_type"] = Resources.lookup_ec2_instance_type( + config_, task.cores + ) log.debug( "Config `{config}` not set for task {name}, using default selection `{value}`.".format( - config=conf, name=task.name, value=task[conf] + config=conf, name=task["name"], value=task["ec2_instance_type"] ) ) - conf = "ec2_volume_type" - if task[conf] is None: - task[conf] = config_.aws.ec2.volume_type + if task["ec2_volume_type"] is None: + task["ec2_volume_type"] = config_.aws.ec2.volume_type log.debug( "Config `{config}` not set for task {name}, using default `{value}`.".format( - config=conf, name=task.name, value=task[conf] + config=conf, name=task["name"], value=task["ec2_volume_type"] ) ) + @staticmethod + def _validate_task(task: Namespace) -> None: + """Raises ValueError if task does not have a name and a way to generate an identifier.""" + if task["name"] is None: + raise ValueError( + f"`name` is mandatory but missing in task definition {task}." + ) + task_id = Namespace.get(task, "id", Resources.generate_task_identifier(task)) + if task_id is None: + raise ValueError( + "task definition must contain an ID or one property " + "among ['openml_task_id', 'dataset'] to create an ID, " + "but task definition is {task}".format(task=str(task)) + ) + + @staticmethod + def lookup_ec2_instance_type(config_: Namespace, cores: int) -> str: + i_series = config_.aws.ec2.instance_type.series + i_map = config_.aws.ec2.instance_type.map + i_size = Resources.lookup_suitable_instance_size(i_map, cores) + return f"{i_series}.{i_size}" + + @staticmethod + def lookup_suitable_instance_size(cores_to_size: Namespace, cores: int) -> str: + if str(cores) in cores_to_size: + return cores_to_size[str(cores)] + + supported_cores = list(map(int, set(dir(cores_to_size)) - {"default"})) + if cores <= 0 or cores > max(supported_cores): + return cores_to_size.default + + cores = next((c for c in sorted(supported_cores) if c >= cores), "default") + return cores_to_size[str(cores)] + @staticmethod def generate_task_identifier(task: Namespace) -> str | None: if task["openml_task_id"] is not None: diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py index e8874cfaa..4b452ce02 100644 --- a/tests/unit/amlb/resources/test_benchmark_definition.py +++ b/tests/unit/amlb/resources/test_benchmark_definition.py @@ -1,5 +1,3 @@ -from functools import partial - import pytest from amlb import Resources @@ -34,20 +32,13 @@ def amlb_dummy_configuration() -> Namespace: def test_validate_task_strict_requires_name(): with pytest.raises(ValueError) as excinfo: - Resources._validate_task( - task=Namespace(), - config_=Namespace(), - lenient=False, - ) + Resources._validate_task(task=Namespace()) assert "mandatory but missing" in excinfo.value.args[0] def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace): - strict_validate = partial( - Resources._validate_task, config_=amlb_dummy_configuration, lenient=False - ) with pytest.raises(ValueError) as excinfo: - strict_validate(task=Namespace(name="foo")) + Resources._validate_task(task=Namespace(name="foo")) assert "must contain an ID or one property" in excinfo.value.args[0] @@ -61,17 +52,17 @@ def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace): (Namespace(dataset=Namespace(id="bar")), "bar"), ], ) -def test_validate_task_id_formatting( +def test_add_task_defaults_formatting( properties: Namespace, expected: str, amlb_dummy_configuration: Namespace ): task = Namespace(name="foo") | properties - Resources._validate_task(task=task, config_=amlb_dummy_configuration) + Resources._add_task_defaults(task=task, config_=amlb_dummy_configuration) assert task["id"] == expected -def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespace): - task = Namespace(name=None) - Resources._validate_task(task, amlb_dummy_configuration, lenient=True) +def test_add_task_defaults_sets_benchmark_defaults(amlb_dummy_configuration: Namespace): + task = Namespace() + Resources._add_task_defaults(task, amlb_dummy_configuration) config = Namespace.dict(amlb_dummy_configuration, deep=True) for setting, default in config["benchmarks"]["defaults"].items(): @@ -79,9 +70,9 @@ def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespa assert task["ec2_volume_type"] == amlb_dummy_configuration.aws.ec2.volume_type -def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace): - task = Namespace(name=None, cores=42) - Resources._validate_task(task, amlb_dummy_configuration, lenient=True) +def test_add_task_defaults_does_not_overwrite(amlb_dummy_configuration: Namespace): + task = Namespace(cores=42) + Resources._add_task_defaults(task, amlb_dummy_configuration) config = Namespace.dict(amlb_dummy_configuration, deep=True) assert task.cores == 42 @@ -90,31 +81,31 @@ def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace): assert task[setting] == default -def test_validate_task_looks_up_instance_type(amlb_dummy_configuration: Namespace): +def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Namespace): instance_type = amlb_dummy_configuration.aws.ec2.instance_type reverse_size_map = {v: k for k, v in Namespace.dict(instance_type.map).items()} n_cores_for_small = int(reverse_size_map["small"]) task = Namespace(name="foo", cores=n_cores_for_small) - Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + Resources._add_task_defaults(task, amlb_dummy_configuration) assert ( task["ec2_instance_type"] == "m5.small" ), "Should resolve to the instance type with the exact amount of cores" task = Namespace(name="foo", cores=n_cores_for_small - 1) - Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + Resources._add_task_defaults(task, amlb_dummy_configuration) assert ( task["ec2_instance_type"] == "m5.small" ), "If exact amount of cores are not available, should resolve to next biggest" task = Namespace(name="foo", cores=n_cores_for_small + 1) - Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + Resources._add_task_defaults(task, amlb_dummy_configuration) assert ( task["ec2_instance_type"] == "m5.large" ), "If bigger than largest in map, should revert to default" task = Namespace(name="foo", ec2_instance_type="bar") - Resources._validate_task(task, amlb_dummy_configuration, lenient=True) + Resources._add_task_defaults(task, amlb_dummy_configuration) assert ( task["ec2_instance_type"] == "bar" ), "Should not overwrite explicit configuration" From 80306209d3743f6872bd25b37e8ac9ffbb19b2de Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sat, 30 Nov 2024 11:50:11 +0200 Subject: [PATCH 08/12] Initialize BenchmarkTask in test --- tests/conftest.py | 2 +- .../resources/test_benchmark_definition.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 04d7cf67b..12bab15e1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -29,7 +29,7 @@ def load_default_resources(tmp_path): ) config_args = Namespace({k: v for k, v in config_args if v is not None}) # merging all configuration files and saving to the global variable - resources.from_configs( + return resources.from_configs( config_default, config_default_dirs, config_user, config_args ) diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py index 4b452ce02..c61d01e25 100644 --- a/tests/unit/amlb/resources/test_benchmark_definition.py +++ b/tests/unit/amlb/resources/test_benchmark_definition.py @@ -1,6 +1,7 @@ import pytest -from amlb import Resources +from amlb import Resources, Benchmark +from amlb.benchmark import BenchmarkTask from amlb.utils import Namespace @@ -109,3 +110,19 @@ def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Name assert ( task["ec2_instance_type"] == "bar" ), "Should not overwrite explicit configuration" + + +def test_benchmark_task(load_default_resources: Resources): + benchmark = Benchmark( + framework_name="constantpredictor", + benchmark_name="test", + constraint_name="test", + job_history=None, + ) + task = Namespace(name="foo") + Resources._add_task_defaults(task, load_default_resources.config) + benchmark_task = BenchmarkTask( + benchmark=benchmark, + task_def=task, + fold=0, + ) From 3b7faead50f4914da40f1ce5af4f190f0ce85079 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sun, 1 Dec 2024 14:36:12 +0200 Subject: [PATCH 09/12] Start adding more tests for benchmark task --- .../resources/test_benchmark_definition.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py index c61d01e25..0cf913169 100644 --- a/tests/unit/amlb/resources/test_benchmark_definition.py +++ b/tests/unit/amlb/resources/test_benchmark_definition.py @@ -112,17 +112,33 @@ def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Name ), "Should not overwrite explicit configuration" -def test_benchmark_task(load_default_resources: Resources): +def create_benchmark_task(resources: Resources, task: Namespace): benchmark = Benchmark( framework_name="constantpredictor", benchmark_name="test", constraint_name="test", job_history=None, ) - task = Namespace(name="foo") - Resources._add_task_defaults(task, load_default_resources.config) - benchmark_task = BenchmarkTask( + Resources._add_task_defaults(task, resources.config) + return BenchmarkTask( benchmark=benchmark, task_def=task, fold=0, ) + + +def test_benchmark_task_load_data_raises_if_no_dataset(load_default_resources): + task = Namespace(name="foo") + benchmark_task = create_benchmark_task(load_default_resources, task) + + with pytest.raises(ValueError) as excinfo: + benchmark_task.load_data() + assert "should have one property" in excinfo.value.args[0] + + +def test_benchmark_task_load_data(load_default_resources, mocker): + task = Namespace(name="foo", openml_task_id=42) + benchmark_task = create_benchmark_task(load_default_resources, task) + + mocker.patch("amlb.benchmark.Benchmark.data_loader.load", return_value={}) + benchmark_task.load_data() From 95048209bacf4e18f427dae9ba27c003dc09efe7 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sun, 8 Dec 2024 11:13:44 +0200 Subject: [PATCH 10/12] Formatting changes --- amlb/benchmark.py | 1 - amlb/resources.py | 17 +++++--- frameworks/FEDOT/__init__.py | 8 ++-- frameworks/FEDOT/exec.py | 24 ++++++----- frameworks/FEDOT/exec_ts.py | 83 +++++++++++++++++++++--------------- 5 files changed, 77 insertions(+), 56 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 097200b6a..3c7653fc2 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -656,7 +656,6 @@ def handle_unfulfilled(message, on_auto="warn"): class BenchmarkTask: - def __init__(self, benchmark: Benchmark, task_def, fold): """ diff --git a/amlb/resources.py b/amlb/resources.py index 8444b934a..f18245ae7 100644 --- a/amlb/resources.py +++ b/amlb/resources.py @@ -212,7 +212,10 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None return self._benchmark_definition(name, self.config, defaults) def _benchmark_definition( - self, name: str, config_: Namespace, defaults: TaskConstraint | None = None + self, + name: str, + config_: Namespace, + defaults_for_task: TaskConstraint | None = None, ): """ :param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id. @@ -222,8 +225,9 @@ def _benchmark_definition( file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load( name, config_.benchmarks.definition_dir ) - if defaults is not None: - defaults = Namespace(**dataclasses.asdict(defaults)) + defaults = None + if defaults_for_task is not None: + defaults = Namespace(**dataclasses.asdict(defaults_for_task)) defaults = Namespace.merge( defaults, file_defaults, Namespace(name="__defaults__") ) @@ -261,7 +265,6 @@ def _add_task_defaults(task: Namespace, config_: Namespace): if task["metric"] is None: task["metric"] = None - if task["ec2_instance_type"] is None: task["ec2_instance_type"] = Resources.lookup_ec2_instance_type( config_, task.cores @@ -311,8 +314,10 @@ def lookup_suitable_instance_size(cores_to_size: Namespace, cores: int) -> str: if cores <= 0 or cores > max(supported_cores): return cores_to_size.default - cores = next((c for c in sorted(supported_cores) if c >= cores), "default") - return cores_to_size[str(cores)] + best_match = next( + (str(c) for c in sorted(supported_cores) if c >= cores), "default" + ) + return cores_to_size[best_match] @staticmethod def generate_task_identifier(task: Namespace) -> str | None: diff --git a/frameworks/FEDOT/__init__.py b/frameworks/FEDOT/__init__.py index e0bb00f94..49c13b700 100644 --- a/frameworks/FEDOT/__init__.py +++ b/frameworks/FEDOT/__init__.py @@ -26,8 +26,10 @@ def run_fedot_tabular(dataset: Dataset, config: TaskConfig): __file__, "exec.py", input_data=data, dataset=dataset, config=config ) + def run_fedot_timeseries(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv + dataset = deepcopy(dataset) data = dict( @@ -43,6 +45,6 @@ def run_fedot_timeseries(dataset: Dataset, config: TaskConfig): repeated_item_id=dataset.repeated_item_id, ) - return run_in_venv(__file__, "exec_ts.py", - input_data=data, dataset=dataset, config=config) - + return run_in_venv( + __file__, "exec_ts.py", input_data=data, dataset=dataset, config=config + ) diff --git a/frameworks/FEDOT/exec.py b/frameworks/FEDOT/exec.py index ffc73ccbc..34d7d4f53 100644 --- a/frameworks/FEDOT/exec.py +++ b/frameworks/FEDOT/exec.py @@ -13,11 +13,13 @@ def run(dataset, config): log.info("\n**** FEDOT ****\n") - is_classification = config.type == 'classification' + is_classification = config.type == "classification" scoring_metric = get_fedot_metrics(config) training_params = {"preset": "best_quality", "n_jobs": config.cores} - training_params.update({k: v for k, v in config.framework_params.items() if not k.startswith('_')}) + training_params.update( + {k: v for k, v in config.framework_params.items() if not k.startswith("_")} + ) n_jobs = training_params["n_jobs"] log.info(f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \ @@ -62,15 +64,15 @@ def run(dataset, config): def get_fedot_metrics(config): metrics_mapping = dict( - acc='accuracy', - auc='roc_auc', - f1='f1', - logloss='neg_log_loss', - mae='mae', - mse='mse', - msle='msle', - r2='r2', - rmse='rmse', + acc="accuracy", + auc="roc_auc", + f1="f1", + logloss="neg_log_loss", + mae="mae", + mse="mse", + msle="msle", + r2="r2", + rmse="rmse", ) scoring_metric = metrics_mapping.get(config.metric, None) diff --git a/frameworks/FEDOT/exec_ts.py b/frameworks/FEDOT/exec_ts.py index f2f11ca92..5261dc9ae 100644 --- a/frameworks/FEDOT/exec_ts.py +++ b/frameworks/FEDOT/exec_ts.py @@ -22,7 +22,9 @@ def run(dataset, config): scoring_metric = get_fedot_metrics(config) training_params = {"preset": "best_quality", "n_jobs": config.cores} - training_params.update({k: v for k, v in config.framework_params.items() if not k.startswith('_')}) + training_params.update( + {k: v for k, v in config.framework_params.items() if not k.startswith("_")} + ) n_jobs = training_params["n_jobs"] log.info(f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \ @@ -30,14 +32,18 @@ def run(dataset, config): task = Task( TaskTypesEnum.ts_forecasting, - TsForecastingParams(forecast_length=dataset.forecast_horizon_in_steps) + TsForecastingParams(forecast_length=dataset.forecast_horizon_in_steps), ) train_df, test_df = load_timeseries_dataset(dataset) id_column = dataset.id_column - max_runtime_minutes_per_ts = config.max_runtime_seconds / 60 / train_df[id_column].nunique() - log.info(f'Fitting FEDOT with a maximum time of {max_runtime_minutes_per_ts}min per series') + max_runtime_minutes_per_ts = ( + config.max_runtime_seconds / 60 / train_df[id_column].nunique() + ) + log.info( + f"Fitting FEDOT with a maximum time of {max_runtime_minutes_per_ts}min per series" + ) training_duration, predict_duration = 0, 0 models_count = 0 @@ -51,10 +57,12 @@ def run(dataset, config): features=train_series, target=train_series, task=task, - data_type=DataTypesEnum.ts + data_type=DataTypesEnum.ts, ) - test_sub_df = test_df[test_df[id_column] == label].drop(columns=[id_column], axis=1) + test_sub_df = test_df[test_df[id_column] == label].drop( + columns=[id_column], axis=1 + ) horizon = len(test_sub_df[dataset.target]) fedot = Fedot( @@ -63,8 +71,9 @@ def run(dataset, config): timeout=max_runtime_minutes_per_ts, metric=scoring_metric, seed=config.seed, - max_pipeline_fit_time=max_runtime_minutes_per_ts / 5, # fit at least 5 pipelines - **training_params + max_pipeline_fit_time=max_runtime_minutes_per_ts + / 5, # fit at least 5 pipelines + **training_params, ) with Timer() as training: @@ -75,7 +84,7 @@ def run(dataset, config): try: prediction = fedot.forecast(train_input, horizon=horizon) except Exception as e: - log.info(f'Pipeline crashed due to {e}. Using no-op forecasting') + log.info(f"Pipeline crashed due to {e}. Using no-op forecasting") prediction = np.full(horizon, train_series[-1]) predict_duration += predict.duration @@ -92,25 +101,27 @@ def run(dataset, config): optional_columns[str(quantile)] = all_series_predictions save_artifacts(fedot, config) - return result(output_file=config.output_predictions_file, - predictions=all_series_predictions, - truth=truth_only, - target_is_encoded=False, - models_count=models_count, - training_duration=training_duration, - predict_duration=predict_duration, - optional_columns=pd.DataFrame(optional_columns)) + return result( + output_file=config.output_predictions_file, + predictions=all_series_predictions, + truth=truth_only, + target_is_encoded=False, + models_count=models_count, + training_duration=training_duration, + predict_duration=predict_duration, + optional_columns=pd.DataFrame(optional_columns), + ) def get_fedot_metrics(config): metrics_mapping = dict( - mape='mape', - smape='smape', - mase='mase', - mse='mse', - rmse='rmse', - mae='mae', - r2='r2', + mape="mape", + smape="smape", + mase="mase", + mse="mse", + rmse="rmse", + mae="mae", + r2="r2", ) scoring_metric = metrics_mapping.get(config.metric, None) @@ -121,27 +132,29 @@ def get_fedot_metrics(config): def save_artifacts(automl, config): - - artifacts = config.framework_params.get('_save_artifacts', []) - if 'models' in artifacts: + artifacts = config.framework_params.get("_save_artifacts", []) + if "models" in artifacts: try: - models_dir = output_subdir('models', config) - models_file = os.path.join(models_dir, 'model.json') + models_dir = output_subdir("models", config) + models_file = os.path.join(models_dir, "model.json") automl.current_pipeline.save(models_file) except Exception as e: log.info(f"Error when saving 'models': {e}.", exc_info=True) - if 'info' in artifacts: + if "info" in artifacts: try: info_dir = output_subdir("info", config) if automl.history: - automl.history.save(os.path.join(info_dir, 'history.json')) + automl.history.save(os.path.join(info_dir, "history.json")) else: - log.info(f"There is no optimization history info to save.") + log.info("There is no optimization history info to save.") except Exception as e: - log.info(f"Error when saving info about optimisation history: {e}.", exc_info=True) + log.info( + f"Error when saving info about optimisation history: {e}.", + exc_info=True, + ) - if 'leaderboard' in artifacts: + if "leaderboard" in artifacts: try: leaderboard_dir = output_subdir("leaderboard", config) if automl.history: @@ -151,5 +164,5 @@ def save_artifacts(automl, config): log.info(f"Error when saving 'leaderboard': {e}.", exc_info=True) -if __name__ == '__main__': +if __name__ == "__main__": call_run(run) From db9c4c488683136647fd088edcda5ba2772fe5cd Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 13 Dec 2024 23:17:51 +0200 Subject: [PATCH 11/12] todo list --- tests/unit/amlb/resources/test_benchmark_definition.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py index 0cf913169..137bd9e5b 100644 --- a/tests/unit/amlb/resources/test_benchmark_definition.py +++ b/tests/unit/amlb/resources/test_benchmark_definition.py @@ -142,3 +142,11 @@ def test_benchmark_task_load_data(load_default_resources, mocker): mocker.patch("amlb.benchmark.Benchmark.data_loader.load", return_value={}) benchmark_task.load_data() + + +# def test_task_config_estimate_params +# then can separate into methods +# dont know if taskconfig is really needed.. except it is passed to integration scripts +# benchmarkingtask overrides taskconfig can be moved to task config +# creating a job doesn't need to live on the task.. probably. It binds `setup` though.. +# and used extensively in Run.. would it make sense for a job to run multiple task config? \ No newline at end of file From 0f48d98861b640fa2145e1f32ef4c108222d1737 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 24 Dec 2024 15:17:11 +0200 Subject: [PATCH 12/12] Add test for resource constraint checks --- amlb/benchmark.py | 43 ++++++++++--------- .../resources/test_benchmark_definition.py | 37 +++++++++++++--- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 3c7653fc2..0aba70f35 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -616,33 +616,32 @@ def handle_unfulfilled(message, on_auto="warn"): os_recommended_mem = ns.get( rconfig(), f"{mode}.os_mem_size_mb", rconfig().benchmarks.os_mem_size_mb ) - left_for_app_mem = int(sys_mem.available - os_recommended_mem) - assigned_mem = round( - self.max_mem_size_mb - if self.max_mem_size_mb > 0 - else left_for_app_mem - if left_for_app_mem > 0 - else sys_mem.available - ) + + if self.max_mem_size_mb <= 0: + left_for_app_mem = int(sys_mem.available - os_recommended_mem) + self.max_mem_size_mb = ( + left_for_app_mem if left_for_app_mem > 0 else sys_mem.available + ) + self.max_mem_size_mb = round(self.max_mem_size_mb) + + if self.max_mem_size_mb > sys_mem.total: + raise JobError( + f"Total system memory {sys_mem.total} MB does not meet requirements (max_mem_size_mb={self.max_mem_size_mb} MB)!.", + ) + log.info( "Assigning %.f MB (total=%.f MB) for new %s task.", - assigned_mem, + self.max_mem_size_mb, sys_mem.total, self.name, ) - self.max_mem_size_mb = assigned_mem - if assigned_mem > sys_mem.total: - handle_unfulfilled( - f"Total system memory {sys_mem.total} MB does not meet requirements ({assigned_mem} MB)!.", - on_auto="fail", - ) - elif assigned_mem > sys_mem.available: + if self.max_mem_size_mb > sys_mem.available: handle_unfulfilled( - f"Assigned memory ({assigned_mem} MB) exceeds system available memory ({sys_mem.available} MB / total={sys_mem.total} MB)!" + f"Assigned memory ({self.max_mem_size_mb} MB) exceeds system available memory ({sys_mem.available} MB / total={sys_mem.total} MB)!" ) - elif assigned_mem > sys_mem.total - os_recommended_mem: + elif self.max_mem_size_mb > sys_mem.total - os_recommended_mem: handle_unfulfilled( - f"Assigned memory ({assigned_mem} MB) is within {sys_mem.available} MB of system total memory {sys_mem.total} MB): " + f"Assigned memory ({self.max_mem_size_mb} MB) is within {sys_mem.available} MB of system total memory {sys_mem.total} MB): " f"We recommend a {os_recommended_mem} MB buffer, otherwise OS memory usage might interfere with the benchmark task." ) @@ -651,7 +650,11 @@ def handle_unfulfilled(message, on_auto="warn"): os_recommended_vol = rconfig().benchmarks.os_vol_size_mb if self.min_vol_size_mb > sys_vol.free: handle_unfulfilled( - f"Available storage ({sys_vol.free} MB / total={sys_vol.total} MB) does not meet requirements ({self.min_vol_size_mb+os_recommended_vol} MB)!" + f"Available storage ({sys_vol.free} MB / total={sys_vol.total} MB) does not meet requirements (min_vol_size_mb={self.min_vol_size_mb} MB)!" + ) + elif self.min_vol_size_mb > sys_vol.free + os_recommended_vol: + handle_unfulfilled( + f"Required storage min_vol_size_mb ({self.min_vol_size_mb}MB) together with recommended storage for OS ({os_recommended_vol} MB exceeds available storage ({sys_vol.free} MB)." ) diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py index 137bd9e5b..1f8781a0a 100644 --- a/tests/unit/amlb/resources/test_benchmark_definition.py +++ b/tests/unit/amlb/resources/test_benchmark_definition.py @@ -2,6 +2,7 @@ from amlb import Resources, Benchmark from amlb.benchmark import BenchmarkTask +from amlb.job import JobError from amlb.utils import Namespace @@ -144,9 +145,33 @@ def test_benchmark_task_load_data(load_default_resources, mocker): benchmark_task.load_data() -# def test_task_config_estimate_params -# then can separate into methods -# dont know if taskconfig is really needed.. except it is passed to integration scripts -# benchmarkingtask overrides taskconfig can be moved to task config -# creating a job doesn't need to live on the task.. probably. It binds `setup` though.. -# and used extensively in Run.. would it make sense for a job to run multiple task config? \ No newline at end of file +def test_task_config_estimate_params(load_default_resources): + task = Namespace(name="foo", openml_task_id=42) + benchmark_task = create_benchmark_task(load_default_resources, task) + task_config = benchmark_task.task_config + + task_config.estimate_system_params() + + +@pytest.mark.parametrize( + "resource", + ["cores", "min_vol_size_mb", "max_mem_size_mb"], +) +def test_task_config_estimate_params_errors_on_insufficient_resources( + load_default_resources, resource +): + task = Namespace(name="foo", openml_task_id=42) + load_default_resources.config.benchmarks.defaults[resource] = 2**40 + load_default_resources.config.benchmarks.on_unfulfilled_constraint = "fail" + benchmark_task = create_benchmark_task(load_default_resources, task) + + with pytest.raises(JobError) as excinfo: + benchmark_task.task_config.estimate_system_params() + + (reason,) = excinfo.value.args + assert resource in reason + assert "does not meet requirement" in reason + # dont know if taskconfig is really needed.. except it is passed to integration scripts + # benchmarkingtask overrides taskconfig can be moved to task config + # creating a job doesn't need to live on the task.. probably. It binds `setup` though.. + # and used extensively in Run.. would it make sense for a job to run multiple task config?