From 022ca9c1212679f7218ec589eac59aa5c7caba2c Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Fri, 29 Nov 2024 13:15:44 +0200
Subject: [PATCH 01/12] Introduce TaskConstraint as dataclass

---
 amlb/benchmark.py              |  6 ++++--
 amlb/frameworks/definitions.py | 10 ++++++++++
 amlb/resources.py              | 12 +++++++-----
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index 90a010053..097200b6a 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -124,8 +124,10 @@ def __init__(
         self.framework_def, self.framework_name = framework, framework.name
         log.debug("Using framework definition: %s.", self.framework_def)
 
-        self.constraint_def, self.constraint_name = rget().constraint_definition(
-            constraint_name
+        task_constraint = rget().constraint_definition(constraint_name)
+        self.constraint_def, self.constraint_name = (
+            task_constraint,
+            task_constraint.name,
         )
         log.debug("Using constraint definition: %s.", self.constraint_def)
 
diff --git a/amlb/frameworks/definitions.py b/amlb/frameworks/definitions.py
index bc3dcb446..9e55cd48b 100644
--- a/amlb/frameworks/definitions.py
+++ b/amlb/frameworks/definitions.py
@@ -281,3 +281,13 @@ def load_framework_definition(
         framework_name, tag = framework_name.split(":", 1)
     definition_ns, name = configuration.framework_definition(framework_name, tag)
     return Framework(**Namespace.dict(definition_ns))
+
+
+@dataclass
+class TaskConstraint:
+    name: str
+    folds: int
+    max_runtime_seconds: int
+    cores: int
+    min_vol_size_mb: int | None = None
+    ec2_volume_type: str | None = None
diff --git a/amlb/resources.py b/amlb/resources.py
index 0e37b240d..62541b86a 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -6,6 +6,7 @@
 from __future__ import annotations
 
 import copy
+import dataclasses
 import logging
 import os
 import random
@@ -14,6 +15,7 @@
 
 from amlb.benchmarks.parser import benchmark_load
 from amlb.frameworks import default_tag, load_framework_definitions
+from .frameworks.definitions import TaskConstraint
 from .utils import (
     Namespace,
     lazy_property,
@@ -172,7 +174,7 @@ def _frameworks(self):
         return load_framework_definitions(frameworks_file, self.config)
 
     @memoize
-    def constraint_definition(self, name):
+    def constraint_definition(self, name: str) -> TaskConstraint:
         """
         :param name: name of the benchmark constraint definition as defined in the constraints file
         :return: a Namespace object with the constraint config (folds, cores, max_runtime_seconds, ...) for the current benchmamk run.
@@ -184,7 +186,7 @@ def constraint_definition(self, name):
                     name, self.config.benchmarks.constraints_file
                 )
             )
-        return constraint, constraint.name
+        return TaskConstraint(**Namespace.dict(constraint))
 
     @lazy_property
     def _constraints(self):
@@ -206,8 +208,7 @@ def _constraints(self):
             constraints_lookup[name.lower()] = c
         return constraints_lookup
 
-    # @memoize
-    def benchmark_definition(self, name, defaults=None):
+    def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None):
         """
         :param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id.
         :param defaults: defaults used as a base config for each task in the benchmark definition
@@ -216,7 +217,8 @@ def benchmark_definition(self, name, defaults=None):
         hard_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
             name, self.config.benchmarks.definition_dir
         )
-
+        if defaults is not None:
+            defaults = Namespace(**dataclasses.asdict(defaults))
         defaults = Namespace.merge(
             defaults, hard_defaults, Namespace(name="__defaults__")
         )

From fa77b974afd432b6a646a6dbc532831bb23b2629 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Fri, 29 Nov 2024 13:59:00 +0200
Subject: [PATCH 02/12] Rename hard_defaults to file_defaults to better
 indicate meaning

---
 amlb/benchmarks/parser.py | 6 +++---
 amlb/resources.py         | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/amlb/benchmarks/parser.py b/amlb/benchmarks/parser.py
index ca5997fb6..239742831 100644
--- a/amlb/benchmarks/parser.py
+++ b/amlb/benchmarks/parser.py
@@ -26,8 +26,8 @@ def benchmark_load(
             name, benchmark_definition_dirs
         )
 
-    hard_defaults = next((task for task in tasks if task.name == "__defaults__"), None)
-    tasks = [task for task in tasks if task is not hard_defaults]
+    file_defaults = next((task for task in tasks if task.name == "__defaults__"), None)
+    tasks = [task for task in tasks if task is not file_defaults]
     for t in tasks:
         t.name = str_sanitize(t.name)
-    return hard_defaults, tasks, benchmark_path, str_sanitize(benchmark_name)
+    return file_defaults, tasks, benchmark_path, str_sanitize(benchmark_name)
diff --git a/amlb/resources.py b/amlb/resources.py
index 62541b86a..4270f51a5 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -214,13 +214,13 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None
         :param defaults: defaults used as a base config for each task in the benchmark definition
         :return:
         """
-        hard_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
+        file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
             name, self.config.benchmarks.definition_dir
         )
         if defaults is not None:
             defaults = Namespace(**dataclasses.asdict(defaults))
         defaults = Namespace.merge(
-            defaults, hard_defaults, Namespace(name="__defaults__")
+            defaults, file_defaults, Namespace(name="__defaults__")
         )
         for task in tasks:
             task |= defaults  # add missing keys from hard defaults + defaults

From 8985d46045394ba8865c50fdd77bf15a63c9afc9 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Fri, 29 Nov 2024 14:19:25 +0200
Subject: [PATCH 03/12] Start Task defintion

---
 amlb/frameworks/definitions.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/amlb/frameworks/definitions.py b/amlb/frameworks/definitions.py
index 9e55cd48b..d7ffccb5c 100644
--- a/amlb/frameworks/definitions.py
+++ b/amlb/frameworks/definitions.py
@@ -291,3 +291,14 @@ class TaskConstraint:
     cores: int
     min_vol_size_mb: int | None = None
     ec2_volume_type: str | None = None
+
+
+@dataclass
+class Task(TaskConstraint):
+    dataset: Namespace | None = None  # TODO: Specify file dataset description
+    enabled: bool = True
+    description: str = ""
+    openml_task_id: int | None = None
+    metric: str | list[str] | None = None
+    # Specific to time series
+    quantile_levels: list[float] | None = None

From 666ca3e09df109e846bc5cbaf2964208631465ae Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Fri, 29 Nov 2024 14:24:20 +0200
Subject: [PATCH 04/12] Setup for testing benchmark loading

---
 amlb/resources.py                             |  29 +++--
 .../resources/test_benchmark_definition.py    | 120 ++++++++++++++++++
 2 files changed, 137 insertions(+), 12 deletions(-)
 create mode 100644 tests/unit/amlb/resources/test_benchmark_definition.py

diff --git a/amlb/resources.py b/amlb/resources.py
index 4270f51a5..17aa18e10 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -209,13 +209,18 @@ def _constraints(self):
         return constraints_lookup
 
     def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None):
+        return self._benchmark_definition(name, self.config, defaults)
+
+    def _benchmark_definition(
+        self, name: str, config_: Namespace, defaults: TaskConstraint | None = None
+    ):
         """
         :param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id.
         :param defaults: defaults used as a base config for each task in the benchmark definition
         :return:
         """
         file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
-            name, self.config.benchmarks.definition_dir
+            name, config_.benchmarks.definition_dir
         )
         if defaults is not None:
             defaults = Namespace(**dataclasses.asdict(defaults))
@@ -224,15 +229,16 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None
         )
         for task in tasks:
             task |= defaults  # add missing keys from hard defaults + defaults
-            self._validate_task(task)
+            Resources._validate_task(task, config_)
 
-        self._validate_task(defaults, lenient=True)
+        Resources._validate_task(defaults, config_, lenient=True)
         defaults.enabled = False
         tasks.append(defaults)
         log.debug("Available task definitions:\n%s", tasks)
         return tasks, benchmark_name, benchmark_path
 
-    def _validate_task(self, task, lenient=False):
+    @staticmethod
+    def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
         missing = []
         for conf in ["name"]:
             if task[conf] is None:
@@ -253,13 +259,16 @@ def _validate_task(self, task, lenient=False):
             "quantile_levels",
         ]:
             if task[conf] is None:
-                task[conf] = self.config.benchmarks.defaults[conf]
+                task[conf] = config_.benchmarks.defaults[conf]
                 log.debug(
                     "Config `{config}` not set for task {name}, using default `{value}`.".format(
                         config=conf, name=task.name, value=task[conf]
                     )
                 )
 
+        if task["metric"] is None:
+            task["metric"] = None
+
         conf = "id"
         if task[conf] is None:
             task[conf] = (
@@ -287,14 +296,10 @@ def _validate_task(self, task, lenient=False):
                     "but task definition is {task}".format(task=str(task))
                 )
 
-        conf = "metric"
-        if task[conf] is None:
-            task[conf] = None
-
         conf = "ec2_instance_type"
         if task[conf] is None:
-            i_series = self.config.aws.ec2.instance_type.series
-            i_map = self.config.aws.ec2.instance_type.map
+            i_series = config_.aws.ec2.instance_type.series
+            i_map = config_.aws.ec2.instance_type.map
             if str(task.cores) in i_map:
                 i_size = i_map[str(task.cores)]
             elif task.cores > 0:
@@ -315,7 +320,7 @@ def _validate_task(self, task, lenient=False):
 
         conf = "ec2_volume_type"
         if task[conf] is None:
-            task[conf] = self.config.aws.ec2.volume_type
+            task[conf] = config_.aws.ec2.volume_type
             log.debug(
                 "Config `{config}` not set for task {name}, using default `{value}`.".format(
                     config=conf, name=task.name, value=task[conf]
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
new file mode 100644
index 000000000..a1841b87d
--- /dev/null
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -0,0 +1,120 @@
+from functools import partial
+
+import pytest
+
+from amlb import Resources
+from amlb.utils import Namespace
+
+
+@pytest.fixture
+def amlb_dummy_configuration() -> Namespace:
+    defaults = {
+        "max_runtime_seconds": 0,
+        "cores": 1,
+        "folds": 2,
+        "max_mem_size_mb": 3,
+        "min_vol_size_mb": 4,
+        "quantile_levels": 5,
+    }
+
+    aws_defaults = {
+        "ec2": {
+            "volume_type": "gp3",
+            "instance_type": {
+                "series": "m5",
+                "map": {"4": "small", "default": "large"},
+            },
+        }
+    }
+    return Namespace(
+        aws=Namespace.from_dict(aws_defaults),
+        benchmarks=Namespace(defaults=Namespace.from_dict(defaults)),
+    )
+
+
+def test_validate_task_strict_requires_name():
+    with pytest.raises(ValueError) as excinfo:
+        Resources._validate_task(
+            task=Namespace(),
+            config_=Namespace(),
+            lenient=False,
+        )
+    assert "mandatory properties as missing" in excinfo.value.args[0]
+
+
+def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace):
+    strict_validate = partial(
+        Resources._validate_task, config_=amlb_dummy_configuration, lenient=False
+    )
+    with pytest.raises(ValueError) as excinfo:
+        strict_validate(task=Namespace(name="foo"))
+    assert "must contain an ID or one property" in excinfo.value.args[0]
+
+
+@pytest.mark.parametrize(
+    ("properties", "expected"),
+    [
+        (Namespace(id="bar"), "bar"),
+        (Namespace(openml_task_id=42), "openml.org/t/42"),
+        (Namespace(openml_dataset_id=42), "openml.org/d/42"),
+        (Namespace(dataset="bar"), "bar"),
+        (Namespace(dataset=Namespace(id="bar")), "bar"),
+    ],
+)
+def test_validate_task_id_formatting(
+    properties: Namespace, expected: str, amlb_dummy_configuration: Namespace
+):
+    task = Namespace(name="foo") | properties
+    Resources._validate_task(task=task, config_=amlb_dummy_configuration)
+    assert task["id"] == expected
+
+
+def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespace):
+    task = Namespace(name=None)
+    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+
+    config = Namespace.dict(amlb_dummy_configuration, deep=True)
+    for setting, default in config["benchmarks"]["defaults"].items():
+        assert task[setting] == default
+    assert task["ec2_volume_type"] == amlb_dummy_configuration.aws.ec2.volume_type
+
+
+def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace):
+    task = Namespace(name=None, cores=42)
+    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+
+    config = Namespace.dict(amlb_dummy_configuration, deep=True)
+    assert task.cores == 42
+    for setting, default in config["benchmarks"]["defaults"].items():
+        if setting != "cores":
+            assert task[setting] == default
+
+
+def test_validate_task_looks_up_instance_type(amlb_dummy_configuration: Namespace):
+    instance_type = amlb_dummy_configuration.aws.ec2.instance_type
+    reverse_size_map = {v: k for k, v in Namespace.dict(instance_type.map).items()}
+    n_cores_for_small = int(reverse_size_map["small"])
+
+    task = Namespace(name="foo", cores=n_cores_for_small)
+    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+    assert (
+        task["ec2_instance_type"] == "m5.small"
+    ), "Should resolve to the instance type with the exact amount of cores"
+
+    task = Namespace(name="foo", cores=n_cores_for_small - 1)
+    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+    assert (
+        task["ec2_instance_type"] == "m5.small"
+    ), "If exact amount of cores are not available, should resolve to next biggest"
+
+    task = Namespace(name="foo", cores=n_cores_for_small + 1)
+    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+    assert (
+        task["ec2_instance_type"] == "m5.large"
+    ), "If bigger than largest in map, should revert to default"
+
+    task = Namespace(name="foo", ec2_instance_type="bar")
+    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+    assert (
+        task["ec2_instance_type"] == "bar"
+    ), "Should not overwrite explicit configuration"

From 6ef3c55c36f137729a739f21d1ea5bb11b03d8fa Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Fri, 29 Nov 2024 20:46:30 +0200
Subject: [PATCH 05/12] Refactor task name check

---
 amlb/resources.py                                      | 10 ++--------
 tests/unit/amlb/resources/test_benchmark_definition.py |  2 +-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/amlb/resources.py b/amlb/resources.py
index 17aa18e10..87fe4e26d 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -239,15 +239,9 @@ def _benchmark_definition(
 
     @staticmethod
     def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
-        missing = []
-        for conf in ["name"]:
-            if task[conf] is None:
-                missing.append(conf)
-        if not lenient and len(missing) > 0:
+        if not lenient and task["name"] is None:
             raise ValueError(
-                "{missing} mandatory properties as missing in task definition {taskdef}.".format(
-                    missing=missing, taskdef=task
-                )
+                f"`name` is mandatory but missing in task definition {task}."
             )
 
         for conf in [
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index a1841b87d..e8874cfaa 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -39,7 +39,7 @@ def test_validate_task_strict_requires_name():
             config_=Namespace(),
             lenient=False,
         )
-    assert "mandatory properties as missing" in excinfo.value.args[0]
+    assert "mandatory but missing" in excinfo.value.args[0]
 
 
 def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace):

From cd5c9120533598ae9745488139466f37f366cd0d Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Fri, 29 Nov 2024 20:56:10 +0200
Subject: [PATCH 06/12] Refactor task id resolution

---
 amlb/resources.py | 52 +++++++++++++++++++++--------------------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/amlb/resources.py b/amlb/resources.py
index 87fe4e26d..1fcdad7be 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -244,6 +244,15 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
                 f"`name` is mandatory but missing in task definition {task}."
             )
 
+        if task["id"] is None:
+            task["id"] = Resources.generate_task_identifier(task)
+        if not lenient and task["id"] is None:
+            raise ValueError(
+                "task definition must contain an ID or one property "
+                "among ['openml_task_id', 'dataset'] to create an ID, "
+                "but task definition is {task}".format(task=str(task))
+            )
+
         for conf in [
             "max_runtime_seconds",
             "cores",
@@ -259,37 +268,8 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
                         config=conf, name=task.name, value=task[conf]
                     )
                 )
-
         if task["metric"] is None:
             task["metric"] = None
-
-        conf = "id"
-        if task[conf] is None:
-            task[conf] = (
-                "openml.org/t/{}".format(task.openml_task_id)
-                if task["openml_task_id"] is not None
-                else "openml.org/d/{}".format(task.openml_dataset_id)
-                if task["openml_dataset_id"] is not None
-                else (
-                    (
-                        task.dataset["id"]
-                        if isinstance(task.dataset, (dict, Namespace))
-                        else task.dataset
-                        if isinstance(task.dataset, str)
-                        else None
-                    )
-                    or task.name
-                )
-                if task["dataset"] is not None
-                else None
-            )
-            if not lenient and task[conf] is None:
-                raise ValueError(
-                    "task definition must contain an ID or one property "
-                    "among ['openml_task_id', 'dataset'] to create an ID, "
-                    "but task definition is {task}".format(task=str(task))
-                )
-
         conf = "ec2_instance_type"
         if task[conf] is None:
             i_series = config_.aws.ec2.instance_type.series
@@ -321,6 +301,20 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
                 )
             )
 
+    @staticmethod
+    def generate_task_identifier(task: Namespace) -> str | None:
+        if task["openml_task_id"] is not None:
+            return f"openml.org/t/{task.openml_task_id}"
+        if task["openml_dataset_id"] is not None:
+            return f"openml.org/d/{task.openml_dataset_id}"
+        if task["dataset"] is None:
+            return None
+        if isinstance(task.dataset, (dict, Namespace)):
+            return task.dataset["id"]
+        if isinstance(task.dataset, str):
+            return task.dataset
+        return task.name
+
 
 __INSTANCE__: Resources | None = None
 

From 57adc9569090b2903f99d8d95bbfbf4b31a3dffe Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Sat, 30 Nov 2024 09:07:28 +0200
Subject: [PATCH 07/12] Break up task validation and setting defaults

---
 amlb/resources.py                             | 87 +++++++++++--------
 .../resources/test_benchmark_definition.py    | 39 ++++-----
 2 files changed, 65 insertions(+), 61 deletions(-)

diff --git a/amlb/resources.py b/amlb/resources.py
index 1fcdad7be..8444b934a 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -229,30 +229,19 @@ def _benchmark_definition(
         )
         for task in tasks:
             task |= defaults  # add missing keys from hard defaults + defaults
-            Resources._validate_task(task, config_)
+            Resources._validate_task(task)
+            Resources._add_task_defaults(task, config_)
 
-        Resources._validate_task(defaults, config_, lenient=True)
+        Resources._add_task_defaults(defaults, config_)
         defaults.enabled = False
         tasks.append(defaults)
         log.debug("Available task definitions:\n%s", tasks)
         return tasks, benchmark_name, benchmark_path
 
     @staticmethod
-    def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
-        if not lenient and task["name"] is None:
-            raise ValueError(
-                f"`name` is mandatory but missing in task definition {task}."
-            )
-
+    def _add_task_defaults(task: Namespace, config_: Namespace):
         if task["id"] is None:
             task["id"] = Resources.generate_task_identifier(task)
-        if not lenient and task["id"] is None:
-            raise ValueError(
-                "task definition must contain an ID or one property "
-                "among ['openml_task_id', 'dataset'] to create an ID, "
-                "but task definition is {task}".format(task=str(task))
-            )
-
         for conf in [
             "max_runtime_seconds",
             "cores",
@@ -265,42 +254,66 @@ def _validate_task(task: Namespace, config_: Namespace, lenient: bool = False):
                 task[conf] = config_.benchmarks.defaults[conf]
                 log.debug(
                     "Config `{config}` not set for task {name}, using default `{value}`.".format(
-                        config=conf, name=task.name, value=task[conf]
+                        config=conf, name=task["name"], value=task[conf]
                     )
                 )
+
         if task["metric"] is None:
             task["metric"] = None
-        conf = "ec2_instance_type"
-        if task[conf] is None:
-            i_series = config_.aws.ec2.instance_type.series
-            i_map = config_.aws.ec2.instance_type.map
-            if str(task.cores) in i_map:
-                i_size = i_map[str(task.cores)]
-            elif task.cores > 0:
-                supported_cores = list(
-                    map(int, Namespace.dict(i_map).keys() - {"default"})
-                )
-                supported_cores.sort()
-                cores = next((c for c in supported_cores if c >= task.cores), "default")
-                i_size = i_map[str(cores)]
-            else:
-                i_size = i_map.default
-            task[conf] = ".".join([i_series, i_size])
+
+
+        if task["ec2_instance_type"] is None:
+            task["ec2_instance_type"] = Resources.lookup_ec2_instance_type(
+                config_, task.cores
+            )
             log.debug(
                 "Config `{config}` not set for task {name}, using default selection `{value}`.".format(
-                    config=conf, name=task.name, value=task[conf]
+                    config=conf, name=task["name"], value=task["ec2_instance_type"]
                 )
             )
 
-        conf = "ec2_volume_type"
-        if task[conf] is None:
-            task[conf] = config_.aws.ec2.volume_type
+        if task["ec2_volume_type"] is None:
+            task["ec2_volume_type"] = config_.aws.ec2.volume_type
             log.debug(
                 "Config `{config}` not set for task {name}, using default `{value}`.".format(
-                    config=conf, name=task.name, value=task[conf]
+                    config=conf, name=task["name"], value=task["ec2_volume_type"]
                 )
             )
 
+    @staticmethod
+    def _validate_task(task: Namespace) -> None:
+        """Raises ValueError if task does not have a name and a way to generate an identifier."""
+        if task["name"] is None:
+            raise ValueError(
+                f"`name` is mandatory but missing in task definition {task}."
+            )
+        task_id = Namespace.get(task, "id", Resources.generate_task_identifier(task))
+        if task_id is None:
+            raise ValueError(
+                "task definition must contain an ID or one property "
+                "among ['openml_task_id', 'dataset'] to create an ID, "
+                "but task definition is {task}".format(task=str(task))
+            )
+
+    @staticmethod
+    def lookup_ec2_instance_type(config_: Namespace, cores: int) -> str:
+        i_series = config_.aws.ec2.instance_type.series
+        i_map = config_.aws.ec2.instance_type.map
+        i_size = Resources.lookup_suitable_instance_size(i_map, cores)
+        return f"{i_series}.{i_size}"
+
+    @staticmethod
+    def lookup_suitable_instance_size(cores_to_size: Namespace, cores: int) -> str:
+        if str(cores) in cores_to_size:
+            return cores_to_size[str(cores)]
+
+        supported_cores = list(map(int, set(dir(cores_to_size)) - {"default"}))
+        if cores <= 0 or cores > max(supported_cores):
+            return cores_to_size.default
+
+        cores = next((c for c in sorted(supported_cores) if c >= cores), "default")
+        return cores_to_size[str(cores)]
+
     @staticmethod
     def generate_task_identifier(task: Namespace) -> str | None:
         if task["openml_task_id"] is not None:
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index e8874cfaa..4b452ce02 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -1,5 +1,3 @@
-from functools import partial
-
 import pytest
 
 from amlb import Resources
@@ -34,20 +32,13 @@ def amlb_dummy_configuration() -> Namespace:
 
 def test_validate_task_strict_requires_name():
     with pytest.raises(ValueError) as excinfo:
-        Resources._validate_task(
-            task=Namespace(),
-            config_=Namespace(),
-            lenient=False,
-        )
+        Resources._validate_task(task=Namespace())
     assert "mandatory but missing" in excinfo.value.args[0]
 
 
 def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace):
-    strict_validate = partial(
-        Resources._validate_task, config_=amlb_dummy_configuration, lenient=False
-    )
     with pytest.raises(ValueError) as excinfo:
-        strict_validate(task=Namespace(name="foo"))
+        Resources._validate_task(task=Namespace(name="foo"))
     assert "must contain an ID or one property" in excinfo.value.args[0]
 
 
@@ -61,17 +52,17 @@ def test_validate_task_strict_requires_id(amlb_dummy_configuration: Namespace):
         (Namespace(dataset=Namespace(id="bar")), "bar"),
     ],
 )
-def test_validate_task_id_formatting(
+def test_add_task_defaults_formatting(
     properties: Namespace, expected: str, amlb_dummy_configuration: Namespace
 ):
     task = Namespace(name="foo") | properties
-    Resources._validate_task(task=task, config_=amlb_dummy_configuration)
+    Resources._add_task_defaults(task=task, config_=amlb_dummy_configuration)
     assert task["id"] == expected
 
 
-def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespace):
-    task = Namespace(name=None)
-    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+def test_add_task_defaults_sets_benchmark_defaults(amlb_dummy_configuration: Namespace):
+    task = Namespace()
+    Resources._add_task_defaults(task, amlb_dummy_configuration)
 
     config = Namespace.dict(amlb_dummy_configuration, deep=True)
     for setting, default in config["benchmarks"]["defaults"].items():
@@ -79,9 +70,9 @@ def test_validate_task_adds_benchmark_defaults(amlb_dummy_configuration: Namespa
     assert task["ec2_volume_type"] == amlb_dummy_configuration.aws.ec2.volume_type
 
 
-def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace):
-    task = Namespace(name=None, cores=42)
-    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+def test_add_task_defaults_does_not_overwrite(amlb_dummy_configuration: Namespace):
+    task = Namespace(cores=42)
+    Resources._add_task_defaults(task, amlb_dummy_configuration)
 
     config = Namespace.dict(amlb_dummy_configuration, deep=True)
     assert task.cores == 42
@@ -90,31 +81,31 @@ def test_validate_task_does_not_overwrite(amlb_dummy_configuration: Namespace):
             assert task[setting] == default
 
 
-def test_validate_task_looks_up_instance_type(amlb_dummy_configuration: Namespace):
+def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Namespace):
     instance_type = amlb_dummy_configuration.aws.ec2.instance_type
     reverse_size_map = {v: k for k, v in Namespace.dict(instance_type.map).items()}
     n_cores_for_small = int(reverse_size_map["small"])
 
     task = Namespace(name="foo", cores=n_cores_for_small)
-    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+    Resources._add_task_defaults(task, amlb_dummy_configuration)
     assert (
         task["ec2_instance_type"] == "m5.small"
     ), "Should resolve to the instance type with the exact amount of cores"
 
     task = Namespace(name="foo", cores=n_cores_for_small - 1)
-    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+    Resources._add_task_defaults(task, amlb_dummy_configuration)
     assert (
         task["ec2_instance_type"] == "m5.small"
     ), "If exact amount of cores are not available, should resolve to next biggest"
 
     task = Namespace(name="foo", cores=n_cores_for_small + 1)
-    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+    Resources._add_task_defaults(task, amlb_dummy_configuration)
     assert (
         task["ec2_instance_type"] == "m5.large"
     ), "If bigger than largest in map, should revert to default"
 
     task = Namespace(name="foo", ec2_instance_type="bar")
-    Resources._validate_task(task, amlb_dummy_configuration, lenient=True)
+    Resources._add_task_defaults(task, amlb_dummy_configuration)
     assert (
         task["ec2_instance_type"] == "bar"
     ), "Should not overwrite explicit configuration"

From 80306209d3743f6872bd25b37e8ac9ffbb19b2de Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Sat, 30 Nov 2024 11:50:11 +0200
Subject: [PATCH 08/12] Initialize BenchmarkTask in test

---
 tests/conftest.py                             |  2 +-
 .../resources/test_benchmark_definition.py    | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 04d7cf67b..12bab15e1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -29,7 +29,7 @@ def load_default_resources(tmp_path):
     )
     config_args = Namespace({k: v for k, v in config_args if v is not None})
     # merging all configuration files and saving to the global variable
-    resources.from_configs(
+    return resources.from_configs(
         config_default, config_default_dirs, config_user, config_args
     )
 
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index 4b452ce02..c61d01e25 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -1,6 +1,7 @@
 import pytest
 
-from amlb import Resources
+from amlb import Resources, Benchmark
+from amlb.benchmark import BenchmarkTask
 from amlb.utils import Namespace
 
 
@@ -109,3 +110,19 @@ def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Name
     assert (
         task["ec2_instance_type"] == "bar"
     ), "Should not overwrite explicit configuration"
+
+
+def test_benchmark_task(load_default_resources: Resources):
+    benchmark = Benchmark(
+        framework_name="constantpredictor",
+        benchmark_name="test",
+        constraint_name="test",
+        job_history=None,
+    )
+    task = Namespace(name="foo")
+    Resources._add_task_defaults(task, load_default_resources.config)
+    benchmark_task = BenchmarkTask(
+        benchmark=benchmark,
+        task_def=task,
+        fold=0,
+    )

From 3b7faead50f4914da40f1ce5af4f190f0ce85079 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Sun, 1 Dec 2024 14:36:12 +0200
Subject: [PATCH 09/12] Start adding more tests for benchmark task

---
 .../resources/test_benchmark_definition.py    | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index c61d01e25..0cf913169 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -112,17 +112,33 @@ def test_add_task_defaults_looks_up_instance_type(amlb_dummy_configuration: Name
     ), "Should not overwrite explicit configuration"
 
 
-def test_benchmark_task(load_default_resources: Resources):
+def create_benchmark_task(resources: Resources, task: Namespace):
     benchmark = Benchmark(
         framework_name="constantpredictor",
         benchmark_name="test",
         constraint_name="test",
         job_history=None,
     )
-    task = Namespace(name="foo")
-    Resources._add_task_defaults(task, load_default_resources.config)
-    benchmark_task = BenchmarkTask(
+    Resources._add_task_defaults(task, resources.config)
+    return BenchmarkTask(
         benchmark=benchmark,
         task_def=task,
         fold=0,
     )
+
+
+def test_benchmark_task_load_data_raises_if_no_dataset(load_default_resources):
+    task = Namespace(name="foo")
+    benchmark_task = create_benchmark_task(load_default_resources, task)
+
+    with pytest.raises(ValueError) as excinfo:
+        benchmark_task.load_data()
+    assert "should have one property" in excinfo.value.args[0]
+
+
+def test_benchmark_task_load_data(load_default_resources, mocker):
+    task = Namespace(name="foo", openml_task_id=42)
+    benchmark_task = create_benchmark_task(load_default_resources, task)
+
+    mocker.patch("amlb.benchmark.Benchmark.data_loader.load", return_value={})
+    benchmark_task.load_data()

From 95048209bacf4e18f427dae9ba27c003dc09efe7 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Sun, 8 Dec 2024 11:13:44 +0200
Subject: [PATCH 10/12] Formatting changes

---
 amlb/benchmark.py            |  1 -
 amlb/resources.py            | 17 +++++---
 frameworks/FEDOT/__init__.py |  8 ++--
 frameworks/FEDOT/exec.py     | 24 ++++++-----
 frameworks/FEDOT/exec_ts.py  | 83 +++++++++++++++++++++---------------
 5 files changed, 77 insertions(+), 56 deletions(-)

diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index 097200b6a..3c7653fc2 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -656,7 +656,6 @@ def handle_unfulfilled(message, on_auto="warn"):
 
 
 class BenchmarkTask:
-
     def __init__(self, benchmark: Benchmark, task_def, fold):
         """
 
diff --git a/amlb/resources.py b/amlb/resources.py
index 8444b934a..f18245ae7 100644
--- a/amlb/resources.py
+++ b/amlb/resources.py
@@ -212,7 +212,10 @@ def benchmark_definition(self, name: str, defaults: TaskConstraint | None = None
         return self._benchmark_definition(name, self.config, defaults)
 
     def _benchmark_definition(
-        self, name: str, config_: Namespace, defaults: TaskConstraint | None = None
+        self,
+        name: str,
+        config_: Namespace,
+        defaults_for_task: TaskConstraint | None = None,
     ):
         """
         :param name: name of the benchmark as defined by resources/benchmarks/{name}.yaml, the path to a user-defined benchmark description file or a study id.
@@ -222,8 +225,9 @@ def _benchmark_definition(
         file_defaults, tasks, benchmark_path, benchmark_name = benchmark_load(
             name, config_.benchmarks.definition_dir
         )
-        if defaults is not None:
-            defaults = Namespace(**dataclasses.asdict(defaults))
+        defaults = None
+        if defaults_for_task is not None:
+            defaults = Namespace(**dataclasses.asdict(defaults_for_task))
         defaults = Namespace.merge(
             defaults, file_defaults, Namespace(name="__defaults__")
         )
@@ -261,7 +265,6 @@ def _add_task_defaults(task: Namespace, config_: Namespace):
         if task["metric"] is None:
             task["metric"] = None
 
-
         if task["ec2_instance_type"] is None:
             task["ec2_instance_type"] = Resources.lookup_ec2_instance_type(
                 config_, task.cores
@@ -311,8 +314,10 @@ def lookup_suitable_instance_size(cores_to_size: Namespace, cores: int) -> str:
         if cores <= 0 or cores > max(supported_cores):
             return cores_to_size.default
 
-        cores = next((c for c in sorted(supported_cores) if c >= cores), "default")
-        return cores_to_size[str(cores)]
+        best_match = next(
+            (str(c) for c in sorted(supported_cores) if c >= cores), "default"
+        )
+        return cores_to_size[best_match]
 
     @staticmethod
     def generate_task_identifier(task: Namespace) -> str | None:
diff --git a/frameworks/FEDOT/__init__.py b/frameworks/FEDOT/__init__.py
index e0bb00f94..49c13b700 100644
--- a/frameworks/FEDOT/__init__.py
+++ b/frameworks/FEDOT/__init__.py
@@ -26,8 +26,10 @@ def run_fedot_tabular(dataset: Dataset, config: TaskConfig):
         __file__, "exec.py", input_data=data, dataset=dataset, config=config
     )
 
+
 def run_fedot_timeseries(dataset: Dataset, config: TaskConfig):
     from frameworks.shared.caller import run_in_venv
+
     dataset = deepcopy(dataset)
 
     data = dict(
@@ -43,6 +45,6 @@ def run_fedot_timeseries(dataset: Dataset, config: TaskConfig):
         repeated_item_id=dataset.repeated_item_id,
     )
 
-    return run_in_venv(__file__, "exec_ts.py",
-                       input_data=data, dataset=dataset, config=config)
-
+    return run_in_venv(
+        __file__, "exec_ts.py", input_data=data, dataset=dataset, config=config
+    )
diff --git a/frameworks/FEDOT/exec.py b/frameworks/FEDOT/exec.py
index ffc73ccbc..34d7d4f53 100644
--- a/frameworks/FEDOT/exec.py
+++ b/frameworks/FEDOT/exec.py
@@ -13,11 +13,13 @@
 def run(dataset, config):
     log.info("\n**** FEDOT ****\n")
 
-    is_classification = config.type == 'classification'
+    is_classification = config.type == "classification"
     scoring_metric = get_fedot_metrics(config)
 
     training_params = {"preset": "best_quality", "n_jobs": config.cores}
-    training_params.update({k: v for k, v in config.framework_params.items() if not k.startswith('_')})
+    training_params.update(
+        {k: v for k, v in config.framework_params.items() if not k.startswith("_")}
+    )
     n_jobs = training_params["n_jobs"]
 
     log.info(f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \
@@ -62,15 +64,15 @@ def run(dataset, config):
 
 def get_fedot_metrics(config):
     metrics_mapping = dict(
-        acc='accuracy',
-        auc='roc_auc',
-        f1='f1',
-        logloss='neg_log_loss',
-        mae='mae',
-        mse='mse',
-        msle='msle',
-        r2='r2',
-        rmse='rmse',
+        acc="accuracy",
+        auc="roc_auc",
+        f1="f1",
+        logloss="neg_log_loss",
+        mae="mae",
+        mse="mse",
+        msle="msle",
+        r2="r2",
+        rmse="rmse",
     )
     scoring_metric = metrics_mapping.get(config.metric, None)
 
diff --git a/frameworks/FEDOT/exec_ts.py b/frameworks/FEDOT/exec_ts.py
index f2f11ca92..5261dc9ae 100644
--- a/frameworks/FEDOT/exec_ts.py
+++ b/frameworks/FEDOT/exec_ts.py
@@ -22,7 +22,9 @@ def run(dataset, config):
     scoring_metric = get_fedot_metrics(config)
 
     training_params = {"preset": "best_quality", "n_jobs": config.cores}
-    training_params.update({k: v for k, v in config.framework_params.items() if not k.startswith('_')})
+    training_params.update(
+        {k: v for k, v in config.framework_params.items() if not k.startswith("_")}
+    )
     n_jobs = training_params["n_jobs"]
 
     log.info(f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \
@@ -30,14 +32,18 @@ def run(dataset, config):
 
     task = Task(
         TaskTypesEnum.ts_forecasting,
-        TsForecastingParams(forecast_length=dataset.forecast_horizon_in_steps)
+        TsForecastingParams(forecast_length=dataset.forecast_horizon_in_steps),
     )
 
     train_df, test_df = load_timeseries_dataset(dataset)
     id_column = dataset.id_column
 
-    max_runtime_minutes_per_ts = config.max_runtime_seconds / 60 / train_df[id_column].nunique()
-    log.info(f'Fitting FEDOT with a maximum time of {max_runtime_minutes_per_ts}min per series')
+    max_runtime_minutes_per_ts = (
+        config.max_runtime_seconds / 60 / train_df[id_column].nunique()
+    )
+    log.info(
+        f"Fitting FEDOT with a maximum time of {max_runtime_minutes_per_ts}min per series"
+    )
 
     training_duration, predict_duration = 0, 0
     models_count = 0
@@ -51,10 +57,12 @@ def run(dataset, config):
             features=train_series,
             target=train_series,
             task=task,
-            data_type=DataTypesEnum.ts
+            data_type=DataTypesEnum.ts,
         )
 
-        test_sub_df = test_df[test_df[id_column] == label].drop(columns=[id_column], axis=1)
+        test_sub_df = test_df[test_df[id_column] == label].drop(
+            columns=[id_column], axis=1
+        )
         horizon = len(test_sub_df[dataset.target])
 
         fedot = Fedot(
@@ -63,8 +71,9 @@ def run(dataset, config):
             timeout=max_runtime_minutes_per_ts,
             metric=scoring_metric,
             seed=config.seed,
-            max_pipeline_fit_time=max_runtime_minutes_per_ts / 5,  # fit at least 5 pipelines
-            **training_params
+            max_pipeline_fit_time=max_runtime_minutes_per_ts
+            / 5,  # fit at least 5 pipelines
+            **training_params,
         )
 
         with Timer() as training:
@@ -75,7 +84,7 @@ def run(dataset, config):
             try:
                 prediction = fedot.forecast(train_input, horizon=horizon)
             except Exception as e:
-                log.info(f'Pipeline crashed due to {e}. Using no-op forecasting')
+                log.info(f"Pipeline crashed due to {e}. Using no-op forecasting")
                 prediction = np.full(horizon, train_series[-1])
 
         predict_duration += predict.duration
@@ -92,25 +101,27 @@ def run(dataset, config):
         optional_columns[str(quantile)] = all_series_predictions
 
     save_artifacts(fedot, config)
-    return result(output_file=config.output_predictions_file,
-                  predictions=all_series_predictions,
-                  truth=truth_only,
-                  target_is_encoded=False,
-                  models_count=models_count,
-                  training_duration=training_duration,
-                  predict_duration=predict_duration,
-                  optional_columns=pd.DataFrame(optional_columns))
+    return result(
+        output_file=config.output_predictions_file,
+        predictions=all_series_predictions,
+        truth=truth_only,
+        target_is_encoded=False,
+        models_count=models_count,
+        training_duration=training_duration,
+        predict_duration=predict_duration,
+        optional_columns=pd.DataFrame(optional_columns),
+    )
 
 
 def get_fedot_metrics(config):
     metrics_mapping = dict(
-        mape='mape',
-        smape='smape',
-        mase='mase',
-        mse='mse',
-        rmse='rmse',
-        mae='mae',
-        r2='r2',
+        mape="mape",
+        smape="smape",
+        mase="mase",
+        mse="mse",
+        rmse="rmse",
+        mae="mae",
+        r2="r2",
     )
     scoring_metric = metrics_mapping.get(config.metric, None)
 
@@ -121,27 +132,29 @@ def get_fedot_metrics(config):
 
 
 def save_artifacts(automl, config):
-
-    artifacts = config.framework_params.get('_save_artifacts', [])
-    if 'models' in artifacts:
+    artifacts = config.framework_params.get("_save_artifacts", [])
+    if "models" in artifacts:
         try:
-            models_dir = output_subdir('models', config)
-            models_file = os.path.join(models_dir, 'model.json')
+            models_dir = output_subdir("models", config)
+            models_file = os.path.join(models_dir, "model.json")
             automl.current_pipeline.save(models_file)
         except Exception as e:
             log.info(f"Error when saving 'models': {e}.", exc_info=True)
 
-    if 'info' in artifacts:
+    if "info" in artifacts:
         try:
             info_dir = output_subdir("info", config)
             if automl.history:
-                automl.history.save(os.path.join(info_dir, 'history.json'))
+                automl.history.save(os.path.join(info_dir, "history.json"))
             else:
-                log.info(f"There is no optimization history info to save.")
+                log.info("There is no optimization history info to save.")
         except Exception as e:
-            log.info(f"Error when saving info about optimisation history: {e}.", exc_info=True)
+            log.info(
+                f"Error when saving info about optimisation history: {e}.",
+                exc_info=True,
+            )
 
-    if 'leaderboard' in artifacts:
+    if "leaderboard" in artifacts:
         try:
             leaderboard_dir = output_subdir("leaderboard", config)
             if automl.history:
@@ -151,5 +164,5 @@ def save_artifacts(automl, config):
             log.info(f"Error when saving 'leaderboard': {e}.", exc_info=True)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     call_run(run)

From db9c4c488683136647fd088edcda5ba2772fe5cd Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Fri, 13 Dec 2024 23:17:51 +0200
Subject: [PATCH 11/12] todo list

---
 tests/unit/amlb/resources/test_benchmark_definition.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index 0cf913169..137bd9e5b 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -142,3 +142,11 @@ def test_benchmark_task_load_data(load_default_resources, mocker):
 
     mocker.patch("amlb.benchmark.Benchmark.data_loader.load", return_value={})
     benchmark_task.load_data()
+
+
+# def test_task_config_estimate_params
+# then can separate into methods
+# dont know if taskconfig is really needed.. except it is passed to integration scripts
+# benchmarkingtask overrides taskconfig can be moved to task config
+# creating a job doesn't need to live on the task.. probably. It binds `setup` though..
+# and used extensively in Run.. would it make sense for a job to run multiple task config?
\ No newline at end of file

From 0f48d98861b640fa2145e1f32ef4c108222d1737 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Tue, 24 Dec 2024 15:17:11 +0200
Subject: [PATCH 12/12] Add test for resource constraint checks

---
 amlb/benchmark.py                             | 43 ++++++++++---------
 .../resources/test_benchmark_definition.py    | 37 +++++++++++++---
 2 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index 3c7653fc2..0aba70f35 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -616,33 +616,32 @@ def handle_unfulfilled(message, on_auto="warn"):
         os_recommended_mem = ns.get(
             rconfig(), f"{mode}.os_mem_size_mb", rconfig().benchmarks.os_mem_size_mb
         )
-        left_for_app_mem = int(sys_mem.available - os_recommended_mem)
-        assigned_mem = round(
-            self.max_mem_size_mb
-            if self.max_mem_size_mb > 0
-            else left_for_app_mem
-            if left_for_app_mem > 0
-            else sys_mem.available
-        )
+
+        if self.max_mem_size_mb <= 0:
+            left_for_app_mem = int(sys_mem.available - os_recommended_mem)
+            self.max_mem_size_mb = (
+                left_for_app_mem if left_for_app_mem > 0 else sys_mem.available
+            )
+            self.max_mem_size_mb = round(self.max_mem_size_mb)
+
+        if self.max_mem_size_mb > sys_mem.total:
+            raise JobError(
+                f"Total system memory {sys_mem.total} MB does not meet requirements (max_mem_size_mb={self.max_mem_size_mb} MB)!.",
+            )
+
         log.info(
             "Assigning %.f MB (total=%.f MB) for new %s task.",
-            assigned_mem,
+            self.max_mem_size_mb,
             sys_mem.total,
             self.name,
         )
-        self.max_mem_size_mb = assigned_mem
-        if assigned_mem > sys_mem.total:
-            handle_unfulfilled(
-                f"Total system memory {sys_mem.total} MB does not meet requirements ({assigned_mem} MB)!.",
-                on_auto="fail",
-            )
-        elif assigned_mem > sys_mem.available:
+        if self.max_mem_size_mb > sys_mem.available:
             handle_unfulfilled(
-                f"Assigned memory ({assigned_mem} MB) exceeds system available memory ({sys_mem.available} MB / total={sys_mem.total} MB)!"
+                f"Assigned memory ({self.max_mem_size_mb} MB) exceeds system available memory ({sys_mem.available} MB / total={sys_mem.total} MB)!"
             )
-        elif assigned_mem > sys_mem.total - os_recommended_mem:
+        elif self.max_mem_size_mb > sys_mem.total - os_recommended_mem:
             handle_unfulfilled(
-                f"Assigned memory ({assigned_mem} MB) is within {sys_mem.available} MB of system total memory {sys_mem.total} MB): "
+                f"Assigned memory ({self.max_mem_size_mb} MB) is within {sys_mem.available} MB of system total memory {sys_mem.total} MB): "
                 f"We recommend a {os_recommended_mem} MB buffer, otherwise OS memory usage might interfere with the benchmark task."
             )
 
@@ -651,7 +650,11 @@ def handle_unfulfilled(message, on_auto="warn"):
             os_recommended_vol = rconfig().benchmarks.os_vol_size_mb
             if self.min_vol_size_mb > sys_vol.free:
                 handle_unfulfilled(
-                    f"Available storage ({sys_vol.free} MB / total={sys_vol.total} MB) does not meet requirements ({self.min_vol_size_mb+os_recommended_vol} MB)!"
+                    f"Available storage ({sys_vol.free} MB / total={sys_vol.total} MB) does not meet requirements (min_vol_size_mb={self.min_vol_size_mb} MB)!"
+                )
+            elif self.min_vol_size_mb > sys_vol.free + os_recommended_vol:
+                handle_unfulfilled(
+                    f"Required storage min_vol_size_mb ({self.min_vol_size_mb}MB) together with recommended storage for OS ({os_recommended_vol} MB exceeds available storage ({sys_vol.free} MB)."
                 )
 
 
diff --git a/tests/unit/amlb/resources/test_benchmark_definition.py b/tests/unit/amlb/resources/test_benchmark_definition.py
index 137bd9e5b..1f8781a0a 100644
--- a/tests/unit/amlb/resources/test_benchmark_definition.py
+++ b/tests/unit/amlb/resources/test_benchmark_definition.py
@@ -2,6 +2,7 @@
 
 from amlb import Resources, Benchmark
 from amlb.benchmark import BenchmarkTask
+from amlb.job import JobError
 from amlb.utils import Namespace
 
 
@@ -144,9 +145,33 @@ def test_benchmark_task_load_data(load_default_resources, mocker):
     benchmark_task.load_data()
 
 
-# def test_task_config_estimate_params
-# then can separate into methods
-# dont know if taskconfig is really needed.. except it is passed to integration scripts
-# benchmarkingtask overrides taskconfig can be moved to task config
-# creating a job doesn't need to live on the task.. probably. It binds `setup` though..
-# and used extensively in Run.. would it make sense for a job to run multiple task config?
\ No newline at end of file
+def test_task_config_estimate_params(load_default_resources):
+    task = Namespace(name="foo", openml_task_id=42)
+    benchmark_task = create_benchmark_task(load_default_resources, task)
+    task_config = benchmark_task.task_config
+
+    task_config.estimate_system_params()
+
+
+@pytest.mark.parametrize(
+    "resource",
+    ["cores", "min_vol_size_mb", "max_mem_size_mb"],
+)
+def test_task_config_estimate_params_errors_on_insufficient_resources(
+    load_default_resources, resource
+):
+    task = Namespace(name="foo", openml_task_id=42)
+    load_default_resources.config.benchmarks.defaults[resource] = 2**40
+    load_default_resources.config.benchmarks.on_unfulfilled_constraint = "fail"
+    benchmark_task = create_benchmark_task(load_default_resources, task)
+
+    with pytest.raises(JobError) as excinfo:
+        benchmark_task.task_config.estimate_system_params()
+
+    (reason,) = excinfo.value.args
+    assert resource in reason
+    assert "does not meet requirement" in reason
+    # dont know if taskconfig is really needed.. except it is passed to integration scripts
+    # benchmarkingtask overrides taskconfig can be moved to task config
+    # creating a job doesn't need to live on the task.. probably. It binds `setup` though..
+    # and used extensively in Run.. would it make sense for a job to run multiple task config?