From 022ca9c1212679f7218ec589eac59aa5c7caba2c Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 29 Nov 2024 13:15:44 +0200
Subject: [PATCH 01/12] Introduce TaskConstraint as dataclass
---
amlb/benchmark.py | 6 ++++--
amlb/frameworks/definitions.py | 10 ++++++++++
amlb/resources.py | 12 +++++++-----
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index 90a010053..097200b6a 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -124,8 +124,10 @@ def __init__(
self.framework_def, self.framework_name = framework, framework.name
log.debug("Using framework definition: %s.", self.framework_def)
- self.constraint_def, self.constraint_name = rget().constraint_definition(
- constraint_name
+ task_constraint = rget().constraint_definition(constraint_name)
+ self.constraint_def, self.constraint_name = (
+ task_constraint,
+ task_constraint.name,
)
log.debug("Using constraint definition: %s.", self.constraint_def)
diff --git a/amlb/frameworks/definitions.py b/amlb/frameworks/definitions.py
index bc3dcb446..9e55cd48b 100644
--- a/amlb/frameworks/definitions.py
+++ b/amlb/frameworks/definitions.py
@@ -281,3 +281,13 @@ def load_framework_definition(
framework_name, tag = framework_name.split(":", 1)
definition_ns, name = configuration.framework_definition(framework_name, tag)
return Framework(**Namespace.dict(definition_ns))
+
+
+@dataclass
+class TaskConstraint:
+ name: str
+ folds: int
+ max_runtime_seconds: int
+ cores: int
+ min_vol_size_mb: int | None = None
+ ec2_volume_type: str | None = None
diff --git a/amlb/resources.py b/amlb/resources.py
index 0e37b240d..62541b86a 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -6,6 +6,7 @@
from __future__ import annotations
import copy
+import dataclasses
import logging
import os
import random
@@ -14,6 +15,7 @@
from amlb.benchmarks.parser import benchmark_load
from amlb.frameworks import default_tag, load_framework_definitions
+from .frameworks.definitions import TaskConstraint
from .utils import (
Namespace,
lazy_property,
@@ -172,7 +174,7 @@ def _frameworks(self):
return load_framework_definitions(frameworks_file, self.config)
@memoize
- def constraint_definition(self, name):
+ def constraint_definition(self, name: str) -> TaskConstraint:
"""
:param name: name of the benchmark constraint definition as defined in the constraints file
:return: a Namespace object with the constraint config (folds, cores, max_runtime_seconds, ...) for the current benchmamk run.
@@ -184,7 +186,7 @@ def constraint_definition(self, name):
name, self.config.benchmarks.constraints_file
)
)
- return constraint, constraint.name
+ return TaskConstraint(**Namespace.dict(constraint))
@lazy_property
def _constraints(self):
@@ -206,8 +208,7 @@ def _constraints(self):
constraints_lookup[name.lower()] = c
return constraints_lookup
- # @memoize
- def benchmark_definition(self, name, defaults=None):
+ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None):
"""
:param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id.
:param defaults: defaults used as a base config for each task in the benchmark definition
@@ -216,7 +217,8 @@ def benchmark_definition(self, name, defaults=None):
hard_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
name, self.config.benchmarks.definition_dir
)
-
+ if defaults is not None:
+ defaults = Namespace(**dataclasses.asdict(defaults))
defaults = Namespace.merge(
defaults, hard_defaults, Namespace(name="__defaults__")
)
From fa77b974afd432b6a646a6dbc532831bb23b2629 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 29 Nov 2024 13:59:00 +0200
Subject: [PATCH 02/12] Rename hard_defaults to file_defaults to better
indicate meaning
---
amlb/benchmarks/parser.py | 6 +++---
amlb/resources.py | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/amlb/benchmarks/parser.py b/amlb/benchmarks/parser.py
index ca5997fb6..239742831 100644
--- a/amlb/benchmarks/parser.py
+++ b/amlb/benchmarks/parser.py
@@ -26,8 +26,8 @@ def benchmark_load(
name, benchmark_definition_dirs
)
- hard_defaults = next((task for task in tasks if task.name == "__defaults__"), None)
- tasks = [task for task in tasks if task is not hard_defaults]
+ file_defaults = next((task for task in tasks if task.name == "__defaults__"), None)
+ tasks = [task for task in tasks if task is not file_defaults]
for t in tasks:
t.name = str_sanitize(t.name)
- return hard_defaults, tasks, benchmark_path, str_sanitize(benchmark_name)
+ return file_defaults, tasks, benchmark_path, str_sanitize(benchmark_name)
diff --git a/amlb/resources.py b/amlb/resources.py
index 62541b86a..4270f51a5 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -214,13 +214,13 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None
:param defaults: defaults used as a base config for each task in the benchmark definition
:return:
"""
- hard_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
+ file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
name, self.config.benchmarks.definition_dir
)
if defaults is not None:
defaults = Namespace(**dataclasses.asdict(defaults))
defaults = Namespace.merge(
- defaults, hard_defaults, Namespace(name="__defaults__")
+ defaults, file_defaults, Namespace(name="__defaults__")
)
for task in tasks:
task |= defaults # add missing keys from hard defaults + defaults
From 8985d46045394ba8865c50fdd77bf15a63c9afc9 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 29 Nov 2024 14:19:25 +0200
Subject: [PATCH 03/12] Start Task defintion
---
amlb/frameworks/definitions.py | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/amlb/frameworks/definitions.py b/amlb/frameworks/definitions.py
index 9e55cd48b..d7ffccb5c 100644
--- a/amlb/frameworks/definitions.py
+++ b/amlb/frameworks/definitions.py
@@ -291,3 +291,14 @@ class TaskConstraint:
cores: int
min_vol_size_mb: int | None = None
ec2_volume_type: str | None = None
+
+
+@dataclass
+class Task(TaskConstraint):
+ dataset: Namespace | None = None # TODO: Specify file dataset description
+ enabled: bool = True
+ description: str = ""
+ openml_task_id: int | None = None
+ metric: str | list[str] | None = None
+ # Specific to time series
+ quantile_levels: list[float] | None = None
From 666ca3e09df109e846bc5cbaf2964208631465ae Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 29 Nov 2024 14:24:20 +0200
Subject: [PATCH 04/12] Setup for testing benchmark loading
---
amlb/resources.py | 29 +++--
.../resources/test_benchmark_definition.py | 120 ++++++++++++++++++
2 files changed, 137 insertions(+), 12 deletions(-)
create mode 100644 tests/unit/amlb/resources/test_benchmark_definition.py
diff --git a/amlb/resources.py b/amlb/resources.py
index 4270f51a5..17aa18e10 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -209,13 +209,18 @@ def _constraints(self):
return constraints_lookup
def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None):
+ return self._benchmark_definition(name, self.config, defaults)
+
+ def _benchmark_definition(
+ self, name: str, config_: Namespace, defaults: TaskConstraint | None = None
+ ):
"""
:param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id.
:param defaults: defaults used as a base config for each task in the benchmark definition
:return:
"""
file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
- name, self.config.benchmarks.definition_dir
+ name, config_.benchmarks.definition_dir
)
if defaults is not None:
defaults = Namespace(**dataclasses.asdict(defaults))
@@ -224,15 +229,16 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None
)
for task in tasks:
task |= defaults # add missing keys from hard defaults + defaults
- self._validate_task(task)
+ Resources._validate_task(task, config_)
- self._validate_task(defaults, lenient=True)
+ Resources._validate_task(defaults, config_, lenient=True)
defaults.enabled = False
tasks.append(defaults)
log.debug("Available task definitions:\n%s", tasks)
return tasks, benchmark_name, benchmark_path
- def _validate_task(self, task, lenient=False):
+ @staticmethod
+ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
missing = []
for conf in ["name"]:
if task[conf] is None:
@@ -253,13 +259,16 @@ def _validate_task(self, task, lenient=False):
"quantile_levels",
]:
if task[conf] is None:
- task[conf] = self.config.benchmarks.defaults[conf]
+ task[conf] = config_.benchmarks.defaults[conf]
log.debug(
"Config `{config}` not set for task {name}, using default `{value}`.".format(
config=conf, name=task.name, value=task[conf]
)
)
+ if task["metric"] is None:
+ task["metric"] = None
+
conf = "id"
if task[conf] is None:
task[conf] = (
@@ -287,14 +296,10 @@ def _validate_task(self, task, lenient=False):
"but task definition is {task}".format(task=str(task))
)
- conf = "metric"
- if task[conf] is None:
- task[conf] = None
-
conf = "ec2_instance_type"
if task[conf] is None:
- i_series = self.config.aws.ec2.instance_type.series
- i_map = self.config.aws.ec2.instance_type.map
+ i_series = config_.aws.ec2.instance_type.series
+ i_map = config_.aws.ec2.instance_type.map
if str(task.cores) in i_map:
i_size = i_map[str(task.cores)]
elif task.cores > 0:
@@ -315,7 +320,7 @@ def _validate_task(self, task, lenient=False):
conf = "ec2_volume_type"
if task[conf] is None:
- task[conf] = self.config.aws.ec2.volume_type
+ task[conf] = config_.aws.ec2.volume_type
log.debug(
"Config `{config}` not set for task {name}, using default `{value}`.".format(
config=conf, name=task.name, value=task[conf]
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
new file mode 100644
index 000000000..a1841b87d
--- /dev/null
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -0,0 +1,120 @@
+from functools import partial
+
+import pytest
+
+from amlb import Resources
+from amlb.utils import Namespace
+
+
+@pytest.fixture
+def amlb_dummy_configuration() -> Namespace:
+ defaults = {
+ "max_runtime_seconds": 0,
+ "cores": 1,
+ "folds": 2,
+ "max_mem_size_mb": 3,
+ "min_vol_size_mb": 4,
+ "quantile_levels": 5,
+ }
+
+ aws_defaults = {
+ "ec2": {
+ "volume_type": "gp3",
+ "instance_type": {
+ "series": "m5",
+ "map": {"4": "small", "default": "large"},
+ },
+ }
+ }
+ return Namespace(
+ aws=Namespace.from_dict(aws_defaults),
+ benchmarks=Namespace(defaults=Namespace.from_dict(defaults)),
+ )
+
+
+def test_validate_task_strict_requires_name():
+ with pytest.raises(ValueError) as excinfo:
+ Resources._validate_task(
+ task=Namespace(),
+ config_=Namespace(),
+ lenient=False,
+ )
+ assert "mandatory properties as missing" in excinfo.value.args[0]
+
+
+def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace):
+ strict_validate = partial(
+ Resources._validate_task, config_=amlb_dummy_configuration, lenient=False
+ )
+ with pytest.raises(ValueError) as excinfo:
+ strict_validate(task=Namespace(name="foo"))
+ assert "must contain an ID or one property" in excinfo.value.args[0]
+
+
+@pytest.mark.parametrize(
+ ("properties", "expected"),
+ [
+ (Namespace(id="bar"), "bar"),
+ (Namespace(openml_task_id=42), "openml.org/t/42"),
+ (Namespace(openml_dataset_id=42), "openml.org/d/42"),
+ (Namespace(dataset="bar"), "bar"),
+ (Namespace(dataset=Namespace(id="bar")), "bar"),
+ ],
+)
+def test_validate_task_id_formatting(
+ properties: Namespace, expected: str, amlb_dummy_configuration: Namespace
+):
+ task = Namespace(name="foo") | properties
+ Resources._validate_task(task=task, config_=amlb_dummy_configuration)
+ assert task["id"] == expected
+
+
+def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespace):
+ task = Namespace(name=None)
+ Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+
+ config = Namespace.dict(amlb_dummy_configuration, deep=True)
+ for setting, default in config["benchmarks"]["defaults"].items():
+ assert task[setting] == default
+ assert task["ec2_volume_type"] == amlb_dummy_configuration.aws.ec2.volume_type
+
+
+def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace):
+ task = Namespace(name=None, cores=42)
+ Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+
+ config = Namespace.dict(amlb_dummy_configuration, deep=True)
+ assert task.cores == 42
+ for setting, default in config["benchmarks"]["defaults"].items():
+ if setting != "cores":
+ assert task[setting] == default
+
+
+def test_validate_task_looks_up_instance_type(amlb_dummy_configuration: Namespace):
+ instance_type = amlb_dummy_configuration.aws.ec2.instance_type
+ reverse_size_map = {v: k for k, v in Namespace.dict(instance_type.map).items()}
+ n_cores_for_small = int(reverse_size_map["small"])
+
+ task = Namespace(name="foo", cores=n_cores_for_small)
+ Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+ assert (
+ task["ec2_instance_type"] == "m5.small"
+ ), "Should resolve to the instance type with the exact amount of cores"
+
+ task = Namespace(name="foo", cores=n_cores_for_small - 1)
+ Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+ assert (
+ task["ec2_instance_type"] == "m5.small"
+ ), "If exact amount of cores are not available, should resolve to next biggest"
+
+ task = Namespace(name="foo", cores=n_cores_for_small + 1)
+ Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+ assert (
+ task["ec2_instance_type"] == "m5.large"
+ ), "If bigger than largest in map, should revert to default"
+
+ task = Namespace(name="foo", ec2_instance_type="bar")
+ Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+ assert (
+ task["ec2_instance_type"] == "bar"
+ ), "Should not overwrite explicit configuration"
From 6ef3c55c36f137729a739f21d1ea5bb11b03d8fa Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 29 Nov 2024 20:46:30 +0200
Subject: [PATCH 05/12] Refactor task name check
---
amlb/resources.py | 10 ++--------
tests/unit/amlb/resources/test_benchmark_definition.py | 2 +-
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/amlb/resources.py b/amlb/resources.py
index 17aa18e10..87fe4e26d 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -239,15 +239,9 @@ def _benchmark_definition(
@staticmethod
def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
- missing = []
- for conf in ["name"]:
- if task[conf] is None:
- missing.append(conf)
- if not lenient and len(missing) > 0:
+ if not lenient and task["name"] is None:
raise ValueError(
- "{missing} mandatory properties as missing in task definition {taskdef}.".format(
- missing=missing, taskdef=task
- )
+ f"`name` is mandatory but missing in task definition {task}."
)
for conf in [
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index a1841b87d..e8874cfaa 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -39,7 +39,7 @@ def test_validate_task_strict_requires_name():
config_=Namespace(),
lenient=False,
)
- assert "mandatory properties as missing" in excinfo.value.args[0]
+ assert "mandatory but missing" in excinfo.value.args[0]
def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace):
From cd5c9120533598ae9745488139466f37f366cd0d Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 29 Nov 2024 20:56:10 +0200
Subject: [PATCH 06/12] Refactor task id resolution
---
amlb/resources.py | 52 +++++++++++++++++++++--------------------------
1 file changed, 23 insertions(+), 29 deletions(-)
diff --git a/amlb/resources.py b/amlb/resources.py
index 87fe4e26d..1fcdad7be 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -244,6 +244,15 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
f"`name` is mandatory but missing in task definition {task}."
)
+ if task["id"] is None:
+ task["id"] = Resources.generate_task_identifier(task)
+ if not lenient and task["id"] is None:
+ raise ValueError(
+ "task definition must contain an ID or one property "
+ "among ['openml_task_id', 'dataset'] to create an ID, "
+ "but task definition is {task}".format(task=str(task))
+ )
+
for conf in [
"max_runtime_seconds",
"cores",
@@ -259,37 +268,8 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
config=conf, name=task.name, value=task[conf]
)
)
-
if task["metric"] is None:
task["metric"] = None
-
- conf = "id"
- if task[conf] is None:
- task[conf] = (
- "openml.org/t/{}".format(task.openml_task_id)
- if task["openml_task_id"] is not None
- else "openml.org/d/{}".format(task.openml_dataset_id)
- if task["openml_dataset_id"] is not None
- else (
- (
- task.dataset["id"]
- if isinstance(task.dataset, (dict, Namespace))
- else task.dataset
- if isinstance(task.dataset, str)
- else None
- )
- or task.name
- )
- if task["dataset"] is not None
- else None
- )
- if not lenient and task[conf] is None:
- raise ValueError(
- "task definition must contain an ID or one property "
- "among ['openml_task_id', 'dataset'] to create an ID, "
- "but task definition is {task}".format(task=str(task))
- )
-
conf = "ec2_instance_type"
if task[conf] is None:
i_series = config_.aws.ec2.instance_type.series
@@ -321,6 +301,20 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
)
)
+ @staticmethod
+ def generate_task_identifier(task: Namespace) -> str | None:
+ if task["openml_task_id"] is not None:
+ return f"openml.org/t/{task.openml_task_id}"
+ if task["openml_dataset_id"] is not None:
+ return f"openml.org/d/{task.openml_dataset_id}"
+ if task["dataset"] is None:
+ return None
+ if isinstance(task.dataset, (dict, Namespace)):
+ return task.dataset["id"]
+ if isinstance(task.dataset, str):
+ return task.dataset
+ return task.name
+
__INSTANCE__: Resources | None = None
From 57adc9569090b2903f99d8d95bbfbf4b31a3dffe Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 30 Nov 2024 09:07:28 +0200
Subject: [PATCH 07/12] Break up task validation and setting defaults
---
amlb/resources.py | 87 +++++++++++--------
.../resources/test_benchmark_definition.py | 39 ++++-----
2 files changed, 65 insertions(+), 61 deletions(-)
diff --git a/amlb/resources.py b/amlb/resources.py
index 1fcdad7be..8444b934a 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -229,30 +229,19 @@ def _benchmark_definition(
)
for task in tasks:
task |= defaults # add missing keys from hard defaults + defaults
- Resources._validate_task(task, config_)
+ Resources._validate_task(task)
+ Resources._add_task_defaults(task, config_)
- Resources._validate_task(defaults, config_, lenient=True)
+ Resources._add_task_defaults(defaults, config_)
defaults.enabled = False
tasks.append(defaults)
log.debug("Available task definitions:\n%s", tasks)
return tasks, benchmark_name, benchmark_path
@staticmethod
- def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
- if not lenient and task["name"] is None:
- raise ValueError(
- f"`name` is mandatory but missing in task definition {task}."
- )
-
+ def _add_task_defaults(task: Namespace, config_: Namespace):
if task["id"] is None:
task["id"] = Resources.generate_task_identifier(task)
- if not lenient and task["id"] is None:
- raise ValueError(
- "task definition must contain an ID or one property "
- "among ['openml_task_id', 'dataset'] to create an ID, "
- "but task definition is {task}".format(task=str(task))
- )
-
for conf in [
"max_runtime_seconds",
"cores",
@@ -265,42 +254,66 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
task[conf] = config_.benchmarks.defaults[conf]
log.debug(
"Config `{config}` not set for task {name}, using default `{value}`.".format(
- config=conf, name=task.name, value=task[conf]
+ config=conf, name=task["name"], value=task[conf]
)
)
+
if task["metric"] is None:
task["metric"] = None
- conf = "ec2_instance_type"
- if task[conf] is None:
- i_series = config_.aws.ec2.instance_type.series
- i_map = config_.aws.ec2.instance_type.map
- if str(task.cores) in i_map:
- i_size = i_map[str(task.cores)]
- elif task.cores > 0:
- supported_cores = list(
- map(int, Namespace.dict(i_map).keys() - {"default"})
- )
- supported_cores.sort()
- cores = next((c for c in supported_cores if c >= task.cores), "default")
- i_size = i_map[str(cores)]
- else:
- i_size = i_map.default
- task[conf] = ".".join([i_series, i_size])
+
+
+ if task["ec2_instance_type"] is None:
+ task["ec2_instance_type"] = Resources.lookup_ec2_instance_type(
+ config_, task.cores
+ )
log.debug(
"Config `{config}` not set for task {name}, using default selection `{value}`.".format(
- config=conf, name=task.name, value=task[conf]
+ config=conf, name=task["name"], value=task["ec2_instance_type"]
)
)
- conf = "ec2_volume_type"
- if task[conf] is None:
- task[conf] = config_.aws.ec2.volume_type
+ if task["ec2_volume_type"] is None:
+ task["ec2_volume_type"] = config_.aws.ec2.volume_type
log.debug(
"Config `{config}` not set for task {name}, using default `{value}`.".format(
- config=conf, name=task.name, value=task[conf]
+ config=conf, name=task["name"], value=task["ec2_volume_type"]
)
)
+ @staticmethod
+ def _validate_task(task: Namespace) -> None:
+ """Raises ValueError if task does not have a name and a way to generate an identifier."""
+ if task["name"] is None:
+ raise ValueError(
+ f"`name` is mandatory but missing in task definition {task}."
+ )
+ task_id = Namespace.get(task, "id", Resources.generate_task_identifier(task))
+ if task_id is None:
+ raise ValueError(
+ "task definition must contain an ID or one property "
+ "among ['openml_task_id', 'dataset'] to create an ID, "
+ "but task definition is {task}".format(task=str(task))
+ )
+
+ @staticmethod
+ def lookup_ec2_instance_type(config_: Namespace, cores: int) -> str:
+ i_series = config_.aws.ec2.instance_type.series
+ i_map = config_.aws.ec2.instance_type.map
+ i_size = Resources.lookup_suitable_instance_size(i_map, cores)
+ return f"{i_series}.{i_size}"
+
+ @staticmethod
+ def lookup_suitable_instance_size(cores_to_size: Namespace, cores: int) -> str:
+ if str(cores) in cores_to_size:
+ return cores_to_size[str(cores)]
+
+ supported_cores = list(map(int, set(dir(cores_to_size)) - {"default"}))
+ if cores <= 0 or cores > max(supported_cores):
+ return cores_to_size.default
+
+ cores = next((c for c in sorted(supported_cores) if c >= cores), "default")
+ return cores_to_size[str(cores)]
+
@staticmethod
def generate_task_identifier(task: Namespace) -> str | None:
if task["openml_task_id"] is not None:
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index e8874cfaa..4b452ce02 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -1,5 +1,3 @@
-from functools import partial
-
import pytest
from amlb import Resources
@@ -34,20 +32,13 @@ def amlb_dummy_configuration() -> Namespace:
def test_validate_task_strict_requires_name():
with pytest.raises(ValueError) as excinfo:
- Resources._validate_task(
- task=Namespace(),
- config_=Namespace(),
- lenient=False,
- )
+ Resources._validate_task(task=Namespace())
assert "mandatory but missing" in excinfo.value.args[0]
def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace):
- strict_validate = partial(
- Resources._validate_task, config_=amlb_dummy_configuration, lenient=False
- )
with pytest.raises(ValueError) as excinfo:
- strict_validate(task=Namespace(name="foo"))
+ Resources._validate_task(task=Namespace(name="foo"))
assert "must contain an ID or one property" in excinfo.value.args[0]
@@ -61,17 +52,17 @@ def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace):
(Namespace(dataset=Namespace(id="bar")), "bar"),
],
)
-def test_validate_task_id_formatting(
+def test_add_task_defaults_formatting(
properties: Namespace, expected: str, amlb_dummy_configuration: Namespace
):
task = Namespace(name="foo") | properties
- Resources._validate_task(task=task, config_=amlb_dummy_configuration)
+ Resources._add_task_defaults(task=task, config_=amlb_dummy_configuration)
assert task["id"] == expected
-def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespace):
- task = Namespace(name=None)
- Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+def test_add_task_defaults_sets_benchmark_defaults(amlb_dummy_configuration: Namespace):
+ task = Namespace()
+ Resources._add_task_defaults(task, amlb_dummy_configuration)
config = Namespace.dict(amlb_dummy_configuration, deep=True)
for setting, default in config["benchmarks"]["defaults"].items():
@@ -79,9 +70,9 @@ def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespa
assert task["ec2_volume_type"] == amlb_dummy_configuration.aws.ec2.volume_type
-def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace):
- task = Namespace(name=None, cores=42)
- Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+def test_add_task_defaults_does_not_overwrite(amlb_dummy_configuration: Namespace):
+ task = Namespace(cores=42)
+ Resources._add_task_defaults(task, amlb_dummy_configuration)
config = Namespace.dict(amlb_dummy_configuration, deep=True)
assert task.cores == 42
@@ -90,31 +81,31 @@ def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace):
assert task[setting] == default
-def test_validate_task_looks_up_instance_type(amlb_dummy_configuration: Namespace):
+def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Namespace):
instance_type = amlb_dummy_configuration.aws.ec2.instance_type
reverse_size_map = {v: k for k, v in Namespace.dict(instance_type.map).items()}
n_cores_for_small = int(reverse_size_map["small"])
task = Namespace(name="foo", cores=n_cores_for_small)
- Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+ Resources._add_task_defaults(task, amlb_dummy_configuration)
assert (
task["ec2_instance_type"] == "m5.small"
), "Should resolve to the instance type with the exact amount of cores"
task = Namespace(name="foo", cores=n_cores_for_small - 1)
- Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+ Resources._add_task_defaults(task, amlb_dummy_configuration)
assert (
task["ec2_instance_type"] == "m5.small"
), "If exact amount of cores are not available, should resolve to next biggest"
task = Namespace(name="foo", cores=n_cores_for_small + 1)
- Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+ Resources._add_task_defaults(task, amlb_dummy_configuration)
assert (
task["ec2_instance_type"] == "m5.large"
), "If bigger than largest in map, should revert to default"
task = Namespace(name="foo", ec2_instance_type="bar")
- Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+ Resources._add_task_defaults(task, amlb_dummy_configuration)
assert (
task["ec2_instance_type"] == "bar"
), "Should not overwrite explicit configuration"
From 80306209d3743f6872bd25b37e8ac9ffbb19b2de Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sat, 30 Nov 2024 11:50:11 +0200
Subject: [PATCH 08/12] Initialize BenchmarkTask in test
---
tests/conftest.py | 2 +-
.../resources/test_benchmark_definition.py | 19 ++++++++++++++++++-
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/tests/conftest.py b/tests/conftest.py
index 04d7cf67b..12bab15e1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -29,7 +29,7 @@ def load_default_resources(tmp_path):
)
config_args = Namespace({k: v for k, v in config_args if v is not None})
# merging all configuration files and saving to the global variable
- resources.from_configs(
+ return resources.from_configs(
config_default, config_default_dirs, config_user, config_args
)
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index 4b452ce02..c61d01e25 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -1,6 +1,7 @@
import pytest
-from amlb import Resources
+from amlb import Resources, Benchmark
+from amlb.benchmark import BenchmarkTask
from amlb.utils import Namespace
@@ -109,3 +110,19 @@ def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Name
assert (
task["ec2_instance_type"] == "bar"
), "Should not overwrite explicit configuration"
+
+
+def test_benchmark_task(load_default_resources: Resources):
+ benchmark = Benchmark(
+ framework_name="constantpredictor",
+ benchmark_name="test",
+ constraint_name="test",
+ job_history=None,
+ )
+ task = Namespace(name="foo")
+ Resources._add_task_defaults(task, load_default_resources.config)
+ benchmark_task = BenchmarkTask(
+ benchmark=benchmark,
+ task_def=task,
+ fold=0,
+ )
From 3b7faead50f4914da40f1ce5af4f190f0ce85079 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sun, 1 Dec 2024 14:36:12 +0200
Subject: [PATCH 09/12] Start adding more tests for benchmark task
---
.../resources/test_benchmark_definition.py | 24 +++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index c61d01e25..0cf913169 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -112,17 +112,33 @@ def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Name
), "Should not overwrite explicit configuration"
-def test_benchmark_task(load_default_resources: Resources):
+def create_benchmark_task(resources: Resources, task: Namespace):
benchmark = Benchmark(
framework_name="constantpredictor",
benchmark_name="test",
constraint_name="test",
job_history=None,
)
- task = Namespace(name="foo")
- Resources._add_task_defaults(task, load_default_resources.config)
- benchmark_task = BenchmarkTask(
+ Resources._add_task_defaults(task, resources.config)
+ return BenchmarkTask(
benchmark=benchmark,
task_def=task,
fold=0,
)
+
+
+def test_benchmark_task_load_data_raises_if_no_dataset(load_default_resources):
+ task = Namespace(name="foo")
+ benchmark_task = create_benchmark_task(load_default_resources, task)
+
+ with pytest.raises(ValueError) as excinfo:
+ benchmark_task.load_data()
+ assert "should have one property" in excinfo.value.args[0]
+
+
+def test_benchmark_task_load_data(load_default_resources, mocker):
+ task = Namespace(name="foo", openml_task_id=42)
+ benchmark_task = create_benchmark_task(load_default_resources, task)
+
+ mocker.patch("amlb.benchmark.Benchmark.data_loader.load", return_value={})
+ benchmark_task.load_data()
From 95048209bacf4e18f427dae9ba27c003dc09efe7 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sun, 8 Dec 2024 11:13:44 +0200
Subject: [PATCH 10/12] Formatting changes
---
amlb/benchmark.py | 1 -
amlb/resources.py | 17 +++++---
frameworks/FEDOT/__init__.py | 8 ++--
frameworks/FEDOT/exec.py | 24 ++++++-----
frameworks/FEDOT/exec_ts.py | 83 +++++++++++++++++++++---------------
5 files changed, 77 insertions(+), 56 deletions(-)
diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index 097200b6a..3c7653fc2 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -656,7 +656,6 @@ def handle_unfulfilled(message, on_auto="warn"):
class BenchmarkTask:
-
def __init__(self, benchmark: Benchmark, task_def, fold):
"""
diff --git a/amlb/resources.py b/amlb/resources.py
index 8444b934a..f18245ae7 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -212,7 +212,10 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None
return self._benchmark_definition(name, self.config, defaults)
def _benchmark_definition(
- self, name: str, config_: Namespace, defaults: TaskConstraint | None = None
+ self,
+ name: str,
+ config_: Namespace,
+ defaults_for_task: TaskConstraint | None = None,
):
"""
:param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id.
@@ -222,8 +225,9 @@ def _benchmark_definition(
file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
name, config_.benchmarks.definition_dir
)
- if defaults is not None:
- defaults = Namespace(**dataclasses.asdict(defaults))
+ defaults = None
+ if defaults_for_task is not None:
+ defaults = Namespace(**dataclasses.asdict(defaults_for_task))
defaults = Namespace.merge(
defaults, file_defaults, Namespace(name="__defaults__")
)
@@ -261,7 +265,6 @@ def _add_task_defaults(task: Namespace, config_: Namespace):
if task["metric"] is None:
task["metric"] = None
-
if task["ec2_instance_type"] is None:
task["ec2_instance_type"] = Resources.lookup_ec2_instance_type(
config_, task.cores
@@ -311,8 +314,10 @@ def lookup_suitable_instance_size(cores_to_size: Namespace, cores: int) -> str:
if cores <= 0 or cores > max(supported_cores):
return cores_to_size.default
- cores = next((c for c in sorted(supported_cores) if c >= cores), "default")
- return cores_to_size[str(cores)]
+ best_match = next(
+ (str(c) for c in sorted(supported_cores) if c >= cores), "default"
+ )
+ return cores_to_size[best_match]
@staticmethod
def generate_task_identifier(task: Namespace) -> str | None:
diff --git a/frameworks/FEDOT/__init__.py b/frameworks/FEDOT/__init__.py
index e0bb00f94..49c13b700 100644
--- a/frameworks/FEDOT/__init__.py
+++ b/frameworks/FEDOT/__init__.py
@@ -26,8 +26,10 @@ def run_fedot_tabular(dataset: Dataset, config: TaskConfig):
__file__, "exec.py", input_data=data, dataset=dataset, config=config
)
+
def run_fedot_timeseries(dataset: Dataset, config: TaskConfig):
from frameworks.shared.caller import run_in_venv
+
dataset = deepcopy(dataset)
data = dict(
@@ -43,6 +45,6 @@ def run_fedot_timeseries(dataset: Dataset, config: TaskConfig):
repeated_item_id=dataset.repeated_item_id,
)
- return run_in_venv(__file__, "exec_ts.py",
- input_data=data, dataset=dataset, config=config)
-
+ return run_in_venv(
+ __file__, "exec_ts.py", input_data=data, dataset=dataset, config=config
+ )
diff --git a/frameworks/FEDOT/exec.py b/frameworks/FEDOT/exec.py
index ffc73ccbc..34d7d4f53 100644
--- a/frameworks/FEDOT/exec.py
+++ b/frameworks/FEDOT/exec.py
@@ -13,11 +13,13 @@
def run(dataset, config):
log.info("\n**** FEDOT ****\n")
- is_classification = config.type == 'classification'
+ is_classification = config.type == "classification"
scoring_metric = get_fedot_metrics(config)
training_params = {"preset": "best_quality", "n_jobs": config.cores}
- training_params.update({k: v for k, v in config.framework_params.items() if not k.startswith('_')})
+ training_params.update(
+ {k: v for k, v in config.framework_params.items() if not k.startswith("_")}
+ )
n_jobs = training_params["n_jobs"]
log.info(f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \
@@ -62,15 +64,15 @@ def run(dataset, config):
def get_fedot_metrics(config):
metrics_mapping = dict(
- acc='accuracy',
- auc='roc_auc',
- f1='f1',
- logloss='neg_log_loss',
- mae='mae',
- mse='mse',
- msle='msle',
- r2='r2',
- rmse='rmse',
+ acc="accuracy",
+ auc="roc_auc",
+ f1="f1",
+ logloss="neg_log_loss",
+ mae="mae",
+ mse="mse",
+ msle="msle",
+ r2="r2",
+ rmse="rmse",
)
scoring_metric = metrics_mapping.get(config.metric, None)
diff --git a/frameworks/FEDOT/exec_ts.py b/frameworks/FEDOT/exec_ts.py
index f2f11ca92..5261dc9ae 100644
--- a/frameworks/FEDOT/exec_ts.py
+++ b/frameworks/FEDOT/exec_ts.py
@@ -22,7 +22,9 @@ def run(dataset, config):
scoring_metric = get_fedot_metrics(config)
training_params = {"preset": "best_quality", "n_jobs": config.cores}
- training_params.update({k: v for k, v in config.framework_params.items() if not k.startswith('_')})
+ training_params.update(
+ {k: v for k, v in config.framework_params.items() if not k.startswith("_")}
+ )
n_jobs = training_params["n_jobs"]
log.info(f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \
@@ -30,14 +32,18 @@ def run(dataset, config):
task = Task(
TaskTypesEnum.ts_forecasting,
- TsForecastingParams(forecast_length=dataset.forecast_horizon_in_steps)
+ TsForecastingParams(forecast_length=dataset.forecast_horizon_in_steps),
)
train_df, test_df = load_timeseries_dataset(dataset)
id_column = dataset.id_column
- max_runtime_minutes_per_ts = config.max_runtime_seconds / 60 / train_df[id_column].nunique()
- log.info(f'Fitting FEDOT with a maximum time of {max_runtime_minutes_per_ts}min per series')
+ max_runtime_minutes_per_ts = (
+ config.max_runtime_seconds / 60 / train_df[id_column].nunique()
+ )
+ log.info(
+ f"Fitting FEDOT with a maximum time of {max_runtime_minutes_per_ts}min per series"
+ )
training_duration, predict_duration = 0, 0
models_count = 0
@@ -51,10 +57,12 @@ def run(dataset, config):
features=train_series,
target=train_series,
task=task,
- data_type=DataTypesEnum.ts
+ data_type=DataTypesEnum.ts,
)
- test_sub_df = test_df[test_df[id_column] == label].drop(columns=[id_column], axis=1)
+ test_sub_df = test_df[test_df[id_column] == label].drop(
+ columns=[id_column], axis=1
+ )
horizon = len(test_sub_df[dataset.target])
fedot = Fedot(
@@ -63,8 +71,9 @@ def run(dataset, config):
timeout=max_runtime_minutes_per_ts,
metric=scoring_metric,
seed=config.seed,
- max_pipeline_fit_time=max_runtime_minutes_per_ts / 5, # fit at least 5 pipelines
- **training_params
+ max_pipeline_fit_time=max_runtime_minutes_per_ts
+ / 5, # fit at least 5 pipelines
+ **training_params,
)
with Timer() as training:
@@ -75,7 +84,7 @@ def run(dataset, config):
try:
prediction = fedot.forecast(train_input, horizon=horizon)
except Exception as e:
- log.info(f'Pipeline crashed due to {e}. Using no-op forecasting')
+ log.info(f"Pipeline crashed due to {e}. Using no-op forecasting")
prediction = np.full(horizon, train_series[-1])
predict_duration += predict.duration
@@ -92,25 +101,27 @@ def run(dataset, config):
optional_columns[str(quantile)] = all_series_predictions
save_artifacts(fedot, config)
- return result(output_file=config.output_predictions_file,
- predictions=all_series_predictions,
- truth=truth_only,
- target_is_encoded=False,
- models_count=models_count,
- training_duration=training_duration,
- predict_duration=predict_duration,
- optional_columns=pd.DataFrame(optional_columns))
+ return result(
+ output_file=config.output_predictions_file,
+ predictions=all_series_predictions,
+ truth=truth_only,
+ target_is_encoded=False,
+ models_count=models_count,
+ training_duration=training_duration,
+ predict_duration=predict_duration,
+ optional_columns=pd.DataFrame(optional_columns),
+ )
def get_fedot_metrics(config):
metrics_mapping = dict(
- mape='mape',
- smape='smape',
- mase='mase',
- mse='mse',
- rmse='rmse',
- mae='mae',
- r2='r2',
+ mape="mape",
+ smape="smape",
+ mase="mase",
+ mse="mse",
+ rmse="rmse",
+ mae="mae",
+ r2="r2",
)
scoring_metric = metrics_mapping.get(config.metric, None)
@@ -121,27 +132,29 @@ def get_fedot_metrics(config):
def save_artifacts(automl, config):
-
- artifacts = config.framework_params.get('_save_artifacts', [])
- if 'models' in artifacts:
+ artifacts = config.framework_params.get("_save_artifacts", [])
+ if "models" in artifacts:
try:
- models_dir = output_subdir('models', config)
- models_file = os.path.join(models_dir, 'model.json')
+ models_dir = output_subdir("models", config)
+ models_file = os.path.join(models_dir, "model.json")
automl.current_pipeline.save(models_file)
except Exception as e:
log.info(f"Error when saving 'models': {e}.", exc_info=True)
- if 'info' in artifacts:
+ if "info" in artifacts:
try:
info_dir = output_subdir("info", config)
if automl.history:
- automl.history.save(os.path.join(info_dir, 'history.json'))
+ automl.history.save(os.path.join(info_dir, "history.json"))
else:
- log.info(f"There is no optimization history info to save.")
+ log.info("There is no optimization history info to save.")
except Exception as e:
- log.info(f"Error when saving info about optimisation history: {e}.", exc_info=True)
+ log.info(
+ f"Error when saving info about optimisation history: {e}.",
+ exc_info=True,
+ )
- if 'leaderboard' in artifacts:
+ if "leaderboard" in artifacts:
try:
leaderboard_dir = output_subdir("leaderboard", config)
if automl.history:
@@ -151,5 +164,5 @@ def save_artifacts(automl, config):
log.info(f"Error when saving 'leaderboard': {e}.", exc_info=True)
-if __name__ == '__main__':
+if __name__ == "__main__":
call_run(run)
From db9c4c488683136647fd088edcda5ba2772fe5cd Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Fri, 13 Dec 2024 23:17:51 +0200
Subject: [PATCH 11/12] todo list
---
tests/unit/amlb/resources/test_benchmark_definition.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index 0cf913169..137bd9e5b 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -142,3 +142,11 @@ def test_benchmark_task_load_data(load_default_resources, mocker):
mocker.patch("amlb.benchmark.Benchmark.data_loader.load", return_value={})
benchmark_task.load_data()
+
+
+# def test_task_config_estimate_params
+# then can separate into methods
+# dont know if taskconfig is really needed.. except it is passed to integration scripts
+# benchmarkingtask overrides taskconfig can be moved to task config
+# creating a job doesn't need to live on the task.. probably. It binds `setup` though..
+# and used extensively in Run.. would it make sense for a job to run multiple task config?
\ No newline at end of file
From 0f48d98861b640fa2145e1f32ef4c108222d1737 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Tue, 24 Dec 2024 15:17:11 +0200
Subject: [PATCH 12/12] Add test for resource constraint checks
---
amlb/benchmark.py | 43 ++++++++++---------
.../resources/test_benchmark_definition.py | 37 +++++++++++++---
2 files changed, 54 insertions(+), 26 deletions(-)
diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index 3c7653fc2..0aba70f35 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -616,33 +616,32 @@ def handle_unfulfilled(message, on_auto="warn"):
os_recommended_mem = ns.get(
rconfig(), f"{mode}.os_mem_size_mb", rconfig().benchmarks.os_mem_size_mb
)
- left_for_app_mem = int(sys_mem.available - os_recommended_mem)
- assigned_mem = round(
- self.max_mem_size_mb
- if self.max_mem_size_mb > 0
- else left_for_app_mem
- if left_for_app_mem > 0
- else sys_mem.available
- )
+
+ if self.max_mem_size_mb <= 0:
+ left_for_app_mem = int(sys_mem.available - os_recommended_mem)
+ self.max_mem_size_mb = (
+ left_for_app_mem if left_for_app_mem > 0 else sys_mem.available
+ )
+ self.max_mem_size_mb = round(self.max_mem_size_mb)
+
+ if self.max_mem_size_mb > sys_mem.total:
+ raise JobError(
+ f"Total system memory {sys_mem.total} MB does not meet requirements (max_mem_size_mb={self.max_mem_size_mb} MB)!.",
+ )
+
log.info(
"Assigning %.f MB (total=%.f MB) for new %s task.",
- assigned_mem,
+ self.max_mem_size_mb,
sys_mem.total,
self.name,
)
- self.max_mem_size_mb = assigned_mem
- if assigned_mem > sys_mem.total:
- handle_unfulfilled(
- f"Total system memory {sys_mem.total} MB does not meet requirements ({assigned_mem} MB)!.",
- on_auto="fail",
- )
- elif assigned_mem > sys_mem.available:
+ if self.max_mem_size_mb > sys_mem.available:
handle_unfulfilled(
- f"Assigned memory ({assigned_mem} MB) exceeds system available memory ({sys_mem.available} MB / total={sys_mem.total} MB)!"
+ f"Assigned memory ({self.max_mem_size_mb} MB) exceeds system available memory ({sys_mem.available} MB / total={sys_mem.total} MB)!"
)
- elif assigned_mem > sys_mem.total - os_recommended_mem:
+ elif self.max_mem_size_mb > sys_mem.total - os_recommended_mem:
handle_unfulfilled(
- f"Assigned memory ({assigned_mem} MB) is within {sys_mem.available} MB of system total memory {sys_mem.total} MB): "
+ f"Assigned memory ({self.max_mem_size_mb} MB) is within {sys_mem.available} MB of system total memory {sys_mem.total} MB): "
f"We recommend a {os_recommended_mem} MB buffer, otherwise OS memory usage might interfere with the benchmark task."
)
@@ -651,7 +650,11 @@ def handle_unfulfilled(message, on_auto="warn"):
os_recommended_vol = rconfig().benchmarks.os_vol_size_mb
if self.min_vol_size_mb > sys_vol.free:
handle_unfulfilled(
- f"Available storage ({sys_vol.free} MB / total={sys_vol.total} MB) does not meet requirements ({self.min_vol_size_mb+os_recommended_vol} MB)!"
+ f"Available storage ({sys_vol.free} MB / total={sys_vol.total} MB) does not meet requirements (min_vol_size_mb={self.min_vol_size_mb} MB)!"
+ )
+ elif self.min_vol_size_mb > sys_vol.free + os_recommended_vol:
+ handle_unfulfilled(
+ f"Required storage min_vol_size_mb ({self.min_vol_size_mb}MB) together with recommended storage for OS ({os_recommended_vol} MB exceeds available storage ({sys_vol.free} MB)."
)
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index 137bd9e5b..1f8781a0a 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -2,6 +2,7 @@
from amlb import Resources, Benchmark
from amlb.benchmark import BenchmarkTask
+from amlb.job import JobError
from amlb.utils import Namespace
@@ -144,9 +145,33 @@ def test_benchmark_task_load_data(load_default_resources, mocker):
benchmark_task.load_data()
-# def test_task_config_estimate_params
-# then can separate into methods
-# dont know if taskconfig is really needed.. except it is passed to integration scripts
-# benchmarkingtask overrides taskconfig can be moved to task config
-# creating a job doesn't need to live on the task.. probably. It binds `setup` though..
-# and used extensively in Run.. would it make sense for a job to run multiple task config?
\ No newline at end of file
+def test_task_config_estimate_params(load_default_resources):
+ task = Namespace(name="foo", openml_task_id=42)
+ benchmark_task = create_benchmark_task(load_default_resources, task)
+ task_config = benchmark_task.task_config
+
+ task_config.estimate_system_params()
+
+
+@pytest.mark.parametrize(
+ "resource",
+ ["cores", "min_vol_size_mb", "max_mem_size_mb"],
+)
+def test_task_config_estimate_params_errors_on_insufficient_resources(
+ load_default_resources, resource
+):
+ task = Namespace(name="foo", openml_task_id=42)
+ load_default_resources.config.benchmarks.defaults[resource] = 2**40
+ load_default_resources.config.benchmarks.on_unfulfilled_constraint = "fail"
+ benchmark_task = create_benchmark_task(load_default_resources, task)
+
+ with pytest.raises(JobError) as excinfo:
+ benchmark_task.task_config.estimate_system_params()
+
+ (reason,) = excinfo.value.args
+ assert resource in reason
+ assert "does not meet requirement" in reason
+ # dont know if taskconfig is really needed.. except it is passed to integration scripts
+ # benchmarkingtask overrides taskconfig can be moved to task config
+ # creating a job doesn't need to live on the task.. probably. It binds `setup` though..
+ # and used extensively in Run.. would it make sense for a job to run multiple task config?