From 8019153e53e9696a1d67d77065a3201f742f9246 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sat, 1 Feb 2025 12:03:27 +0300
Subject: [PATCH 01/74] define interface

---
 autointent/modules/abc/_scoring.py | 33 +++++++++++++++---------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index f275de9d0..7ac8045ff 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -1,8 +1,9 @@
 """Base class for scoring modules."""
 
 from abc import ABC, abstractmethod
-from typing import Any, Literal
+from typing import Any
 
+import numpy as np
 import numpy.typing as npt
 
 from autointent import Context
@@ -21,7 +22,7 @@ class ScoringModule(Module, ABC):
 
     supports_oos = False
 
-    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
+    def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, float]:
         """
         Evaluate the scorer on a test set and compute the specified metric.
 
@@ -29,25 +30,23 @@ def score(self, context: Context, split: Literal["validation", "test"], metrics:
         :param split: Target split
         :return: Computed metrics value for the test set or error code of metrics
         """
-        if split == "validation":
-            utterances = context.data_handler.validation_utterances(0)
-            labels = context.data_handler.validation_labels(0)
-        elif split == "test":
+        metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
+        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+
+        if test:
             utterances = context.data_handler.test_utterances()
             labels = context.data_handler.test_labels()
-        else:
-            message = f"Invalid split '{split}' provided. Expected one of 'validation', or 'test'."
-            raise ValueError(message)
+            scores = self.predict(utterances)
+            return self.score_metrics((labels, scores), chosen_metrics)
 
-        scores = self.predict(utterances)
+        metrics_values = {name: [] for name in chosen_metrics}
+        for train_utterances, train_labels, val_utterances, val_labels in context.validation_iterator(0):
+            self.fit(train_utterances, train_labels)
+            val_scores = self.predict(val_utterances)
+            for name, fn in chosen_metrics.items():
+                metrics_values[name].append(fn(val_labels, val_scores))
 
-        self._train_scores = self.predict(context.data_handler.train_utterances(1))
-        self._validation_scores = self.predict(context.data_handler.validation_utterances(1))
-        self._test_scores = self.predict(context.data_handler.test_utterances())
-
-        metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
-        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
-        return self.score_metrics((labels, scores), chosen_metrics)
+        return {name: np.mean(values_list) for name, values_list in metrics_values.items()}
 
     def get_assets(self) -> ScorerArtifact:
         """

From 90af5ce9b2426681fdea5076a76504c7858b07dc Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sat, 1 Feb 2025 12:39:49 +0300
Subject: [PATCH 02/74] basic ho iterator

---
 .../context/data_handler/_data_handler.py     | 33 ++++++++++++++++---
 autointent/modules/abc/_scoring.py            |  2 +-
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index 06387b1f1..b0772e4d0 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -2,7 +2,7 @@
 
 import logging
 from pathlib import Path
-from typing import TypedDict, cast
+from typing import Literal, TypedDict, cast
 
 from datasets import concatenate_datasets
 from transformers import set_seed
@@ -26,10 +26,12 @@ class RegexPatterns(TypedDict):
     """Partial match regex patterns."""
 
 
-class DataHandler:
+class DataHandler:  # TODO rename to Validator
     """Data handler class."""
 
-    def __init__(self, dataset: Dataset, random_seed: int = 0, split_train: bool = True) -> None:
+    def __init__(
+        self, dataset: Dataset, scheme: Literal["cv", "ho"], split_train: bool = True, random_seed: int = 0
+    ) -> None:
         """
         Initialize the data handler.
 
@@ -43,8 +45,12 @@ def __init__(self, dataset: Dataset, random_seed: int = 0, split_train: bool = T
         self.dataset = dataset
 
         self.n_classes = self.dataset.n_classes
+        self.scheme = scheme
 
-        self._split(random_seed, split_train)
+        if scheme == "ho":
+            self._split_ho(random_seed, split_train)
+        elif scheme == "cv":
+            self._split_cv(random_seed)
 
         self.regexp_patterns = [
             RegexPatterns(
@@ -153,6 +159,23 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
         return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
+    def validation_iterator(self, idx: int | None = None) -> list[tuple[list, list, list, list]]:
+        if self.scheme == "ho":
+            return [
+                (
+                    self.train_utterances(idx),
+                    self.train_labels(idx),
+                    self.validation_utterances(idx),
+                    self.validation_labels(idx),
+                )
+            ]
+
+        if self.scheme == "cv":
+            raise NotImplementedError
+
+        msg = "something's wrong"
+        raise RuntimeError(msg)
+
     def dump(self, filepath: str | Path) -> None:
         """
         Save the dataset splits and intents to a JSON file.
@@ -161,7 +184,7 @@ def dump(self, filepath: str | Path) -> None:
         """
         self.dataset.to_json(filepath)
 
-    def _split(self, random_seed: int, split_train: bool) -> None:
+    def _split_ho(self, random_seed: int, split_train: bool) -> None:
         has_validation_split = any(split.startswith(Split.VALIDATION) for split in self.dataset)
 
         if split_train and Split.TRAIN in self.dataset:
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index 7ac8045ff..bac3f8f0e 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -40,7 +40,7 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f
             return self.score_metrics((labels, scores), chosen_metrics)
 
         metrics_values = {name: [] for name in chosen_metrics}
-        for train_utterances, train_labels, val_utterances, val_labels in context.validation_iterator(0):
+        for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(0):
             self.fit(train_utterances, train_labels)
             val_scores = self.predict(val_utterances)
             for name, fn in chosen_metrics.items():

From 86016782475a9a5ba18bbaa9eae5e977bfe64408 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sat, 1 Feb 2025 13:00:10 +0300
Subject: [PATCH 03/74] move obtaining data for train from node optimizer to
 modules themselves

---
 autointent/modules/abc/_base.py               |  4 +++
 autointent/modules/abc/_decision.py           |  4 +++
 autointent/modules/abc/_embedding.py          |  5 ++++
 autointent/modules/abc/_scoring.py            |  4 +++
 autointent/modules/regexp/_regexp.py          |  3 ++
 .../scoring/_description/description.py       |  7 +++++
 .../nodes/_optimization/_node_optimizer.py    | 29 -------------------
 7 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index 287b5126d..567405254 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -166,3 +166,7 @@ def _get_task_specs(labels: ListOfGenericLabels) -> tuple[int, bool, bool]:
         multilabel = isinstance(in_domain_label, list)
         n_classes = len(labels[0]) if multilabel else len(set(labels).difference([None]))  # type: ignore[arg-type]
         return n_classes, multilabel, contains_oos_samples
+
+    @abstractmethod
+    def get_train_data(self, context: Context) -> Any:  # noqa: ANN401
+        ...
diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py
index 750ee05f7..367a2a435 100644
--- a/autointent/modules/abc/_decision.py
+++ b/autointent/modules/abc/_decision.py
@@ -72,6 +72,10 @@ def _validate_task(self, scores: npt.NDArray[Any], labels: ListOfGenericLabels)
             )
             raise ValueError(msg)
 
+    def get_train_data(self, context: Context) -> tuple[npt.NDArray[Any], ListOfGenericLabels, list[Tag]]:
+        labels, scores = get_decision_evaluation_data(context, "train")
+        return (scores, labels, context.data_handler.tags)
+
 
 def get_decision_evaluation_data(
     context: Context,
diff --git a/autointent/modules/abc/_embedding.py b/autointent/modules/abc/_embedding.py
index cb3a2f412..708a749eb 100644
--- a/autointent/modules/abc/_embedding.py
+++ b/autointent/modules/abc/_embedding.py
@@ -2,8 +2,13 @@
 
 from abc import ABC
 
+from autointent import Context
+from autointent.custom_types import ListOfLabels
 from autointent.modules.abc import Module
 
 
 class EmbeddingModule(Module, ABC):
     """Base class for embedding modules."""
+
+    def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]:
+        return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index bac3f8f0e..97004a63d 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -8,6 +8,7 @@
 
 from autointent import Context
 from autointent.context.optimization_info import ScorerArtifact
+from autointent.custom_types import ListOfLabels
 from autointent.metrics import SCORING_METRICS_MULTICLASS, SCORING_METRICS_MULTILABEL
 from autointent.modules.abc import Module
 
@@ -60,6 +61,9 @@ def get_assets(self) -> ScorerArtifact:
             test_scores=self._test_scores,
         )
 
+    def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]:
+        return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))
+
     @abstractmethod
     def predict(self, utterances: list[str]) -> npt.NDArray[Any]:
         """
diff --git a/autointent/modules/regexp/_regexp.py b/autointent/modules/regexp/_regexp.py
index 05d0278eb..f486b682b 100644
--- a/autointent/modules/regexp/_regexp.py
+++ b/autointent/modules/regexp/_regexp.py
@@ -151,3 +151,6 @@ def _compile_regex_patterns(self) -> None:
             )
             for regexp_patterns in self.regexp_patterns
         ]
+
+    def get_train_data(self, context: Context) -> tuple:
+        return ()
diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py
index aa6994c8c..071837cf2 100644
--- a/autointent/modules/scoring/_description/description.py
+++ b/autointent/modules/scoring/_description/description.py
@@ -146,3 +146,10 @@ def predict(self, utterances: list[str]) -> NDArray[np.float64]:
     def clear_cache(self) -> None:
         """Clear cached data in memory used by the embedder."""
         self._embedder.clear_ram()
+
+    def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels, list[str]]:
+        return (
+            context.data_handler.train_utterances(0),
+            context.data_handler.train_labels(0),
+            context.data_handler.intent_descriptions,
+        )
diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 868403f9b..f8f29b763 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -11,8 +11,6 @@
 
 from autointent.context import Context
 from autointent.custom_types import NodeType
-from autointent.modules.abc import Module
-from autointent.modules.abc._decision import get_decision_evaluation_data
 from autointent.nodes._nodes_info import NODES_INFO
 
 
@@ -116,30 +114,3 @@ def get_module_dump_dir(self, dump_dir: Path, module_name: str, j_combination: i
         dump_dir_ = dump_dir / self.node_info.node_type / module_name / f"comb_{j_combination}"
         dump_dir_.mkdir(parents=True, exist_ok=True)
         return str(dump_dir_)
-
-    def module_fit(self, module: Module, context: Context) -> None:
-        """
-        Fit the module.
-
-        :param module: Module to fit
-        :param context: Context to use
-        """
-        if self.node_info.node_type in ["embedding", "scoring"]:
-            if module.__class__.__name__ == "DescriptionScorer":
-                args = (
-                    context.data_handler.train_utterances(0),
-                    context.data_handler.train_labels(0),
-                    context.data_handler.intent_descriptions,
-                )
-            else:
-                args = (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))  # type: ignore[assignment]
-        elif self.node_info.node_type == "decision":
-            labels, scores = get_decision_evaluation_data(context, "train")
-            args = (scores, labels, context.data_handler.tags)  # type: ignore[assignment]
-        elif self.node_info.node_type == "regexp":
-            args = ()  # type: ignore[assignment]
-        else:
-            msg = "something's wrong"
-            self._logger.error(msg)
-            raise ValueError(msg)
-        module.fit(*args)  # type: ignore[arg-type]

From ccf6e415b6e00d3430457eb6b175eed6160126f7 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sun, 2 Feb 2025 08:01:20 +0300
Subject: [PATCH 04/74] stage progress

---
 autointent/modules/abc/_base.py                   | 4 ++--
 autointent/modules/abc/_scoring.py                | 4 ++++
 autointent/nodes/_optimization/_node_optimizer.py | 5 +----
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index 567405254..61de46241 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -3,7 +3,7 @@
 import logging
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any
 
 import numpy.typing as npt
 
@@ -34,7 +34,7 @@ def fit(self, *args: tuple[Any], **kwargs: dict[str, Any]) -> None:
         """
 
     @abstractmethod
-    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
+    def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, float]:
         """
         Calculate metric on test set and return metric value.
 
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index 97004a63d..f93c55c2b 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -41,11 +41,15 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f
             return self.score_metrics((labels, scores), chosen_metrics)
 
         metrics_values = {name: [] for name in chosen_metrics}
+        all_val_scores = []
         for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(0):
             self.fit(train_utterances, train_labels)
             val_scores = self.predict(val_utterances)
             for name, fn in chosen_metrics.items():
                 metrics_values[name].append(fn(val_labels, val_scores))
+            all_val_scores.append(val_scores)
+
+        self._validation_scores = np.concat(all_val_scores, axis=0)
 
         return {name: np.mean(values_list) for name, values_list in metrics_values.items()}
 
diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index f8f29b763..2dd481bfa 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -66,11 +66,8 @@ def fit(self, context: Context) -> None:
                 if embedder_name is not None:
                     module_kwargs["embedder_name"] = embedder_name
 
-                self._logger.debug("optimizing %s module...", module_name)
-                self.module_fit(module, context)
-
                 self._logger.debug("scoring %s module...", module_name)
-                metrics_score = module.score(context, "validation", self.metrics)
+                metrics_score = module.score(context, test=False, metrics=self.metrics)
                 metric_value = metrics_score[self.target_metric]
 
                 context.callback_handler.log_metrics(metrics_score)

From a307fee35f5060445020414889e7cbf39f20907b Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Sun, 2 Feb 2025 09:59:13 +0300
Subject: [PATCH 05/74] implement cv iterator

---
 .../context/data_handler/_data_handler.py     | 32 +++++++++++--------
 autointent/modules/abc/_decision.py           | 19 +++++++++--
 autointent/modules/abc/_scoring.py            | 32 +++++++++++--------
 3 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index b0772e4d0..7fa2a4687 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -1,6 +1,7 @@
 """Data Handler file."""
 
 import logging
+from collections.abc import Generator
 from pathlib import Path
 from typing import Literal, TypedDict, cast
 
@@ -30,7 +31,12 @@ class DataHandler:  # TODO rename to Validator
     """Data handler class."""
 
     def __init__(
-        self, dataset: Dataset, scheme: Literal["cv", "ho"], split_train: bool = True, random_seed: int = 0
+        self,
+        dataset: Dataset,
+        scheme: Literal["cv", "ho"],
+        split_train: bool = True,
+        random_seed: int = 0,
+        n_folds: int = 3,
     ) -> None:
         """
         Initialize the data handler.
@@ -46,6 +52,7 @@ def __init__(
 
         self.n_classes = self.dataset.n_classes
         self.scheme = scheme
+        self.n_folds = n_folds
 
         if scheme == "ho":
             self._split_ho(random_seed, split_train)
@@ -159,19 +166,18 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
         return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
-    def validation_iterator(self, idx: int | None = None) -> list[tuple[list, list, list, list]]:
+    def validation_iterator(self) -> Generator[tuple[list, list, list, list]]:
         if self.scheme == "ho":
-            return [
-                (
-                    self.train_utterances(idx),
-                    self.train_labels(idx),
-                    self.validation_utterances(idx),
-                    self.validation_labels(idx),
-                )
-            ]
-
-        if self.scheme == "cv":
-            raise NotImplementedError
+            msg = "Cannot call cross-validation on hold-out DataHandler"
+            raise RuntimeError(msg)
+
+        for j in range(self.n_folds):
+            val_utterances = self.train_utterances(j)
+            val_labels = self.train_labels(j)
+            train_folds = [i for i in range(self.n_folds) if i != j]
+            train_utterances = [ut for i_fold in train_folds for ut in self.train_utterances(i_fold)]
+            train_labels = [ut for i_fold in train_folds for ut in self.train_labels(i_fold)]
+            yield train_utterances, train_labels, val_utterances, val_labels
 
         msg = "something's wrong"
         raise RuntimeError(msg)
diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py
index 367a2a435..f7501f4eb 100644
--- a/autointent/modules/abc/_decision.py
+++ b/autointent/modules/abc/_decision.py
@@ -40,7 +40,7 @@ def predict(self, scores: npt.NDArray[Any]) -> ListOfGenericLabels:
         :param scores: Scores to predict
         """
 
-    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
+    def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
         Calculate metric on test set and return metric value.
 
@@ -48,7 +48,20 @@ def score(self, context: Context, split: Literal["validation", "test"], metrics:
         :param split: Target split
         :return: Computed metrics value for the test set or error code of metrics
         """
-        labels, scores = get_decision_evaluation_data(context, split)
+        labels, scores = get_decision_evaluation_data(context, "validation")
+        self._decisions = self.predict(scores)
+        chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics}
+        return self.score_metrics((labels, self._decisions), chosen_metrics)
+
+    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
+        """
+        Calculate metric on test set and return metric value.
+
+        :param context: Context to score
+        :param split: Target split
+        :return: Computed metrics value for the test set or error code of metrics
+        """
+        labels, scores = get_decision_evaluation_data(context, "validation")
         self._decisions = self.predict(scores)
         chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics}
         return self.score_metrics((labels, self._decisions), chosen_metrics)
@@ -79,7 +92,7 @@ def get_train_data(self, context: Context) -> tuple[npt.NDArray[Any], ListOfGene
 
 def get_decision_evaluation_data(
     context: Context,
-    split: Literal["train", "validation", "test"],
+    split: Literal["train", "validation", "test"], # TODO add index to handle both ho and cv
 ) -> tuple[ListOfGenericLabels, npt.NDArray[np.float64]]:
     """
     Get decision evaluation data.
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index f93c55c2b..945353868 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -23,7 +23,22 @@ class ScoringModule(Module, ABC):
 
     supports_oos = False
 
-    def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, float]:
+    def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
+        utterances = context.data_handler.validation_utterances(0)
+        labels = context.data_handler.validation_labels(0)
+
+        scores = self.predict(utterances)
+
+        self._artifact = ScorerArtifact(
+            train_scores=self.predict(context.data_handler.train_utterances(1)),
+            validation_scores=self.predict(context.data_handler.validation_utterances(1)),
+        )
+
+        metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
+        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+        return self.score_metrics((labels, scores), chosen_metrics)
+
+    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
         Evaluate the scorer on a test set and compute the specified metric.
 
@@ -34,12 +49,6 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f
         metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
         chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
 
-        if test:
-            utterances = context.data_handler.test_utterances()
-            labels = context.data_handler.test_labels()
-            scores = self.predict(utterances)
-            return self.score_metrics((labels, scores), chosen_metrics)
-
         metrics_values = {name: [] for name in chosen_metrics}
         all_val_scores = []
         for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(0):
@@ -49,7 +58,8 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f
                 metrics_values[name].append(fn(val_labels, val_scores))
             all_val_scores.append(val_scores)
 
-        self._validation_scores = np.concat(all_val_scores, axis=0)
+        # save all predictions unbinded to preserve folding
+        self._artifact = ScorerArtifact(validation_scores=all_val_scores)
 
         return {name: np.mean(values_list) for name, values_list in metrics_values.items()}
 
@@ -59,11 +69,7 @@ def get_assets(self) -> ScorerArtifact:
 
         :return: ScorerArtifact containing test, validation and test scores.
         """
-        return ScorerArtifact(
-            train_scores=self._train_scores,
-            validation_scores=self._validation_scores,
-            test_scores=self._test_scores,
-        )
+        return self._artifact
 
     def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]:
         return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))

From 8649e14cc21436c5a112eba3e40a02894b44cc13 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 06:10:48 +0300
Subject: [PATCH 06/74] minor bug fix

---
 autointent/context/optimization_info/_data_models.py | 3 +++
 autointent/modules/abc/_scoring.py                   | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/autointent/context/optimization_info/_data_models.py b/autointent/context/optimization_info/_data_models.py
index 49ca6bf83..55ce0937c 100644
--- a/autointent/context/optimization_info/_data_models.py
+++ b/autointent/context/optimization_info/_data_models.py
@@ -42,6 +42,9 @@ class ScorerArtifact(Artifact):
     train_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for train utterances")
     validation_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for validation utterances")
     test_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for test utterances")
+    folded_scores: list[NDArray[np.float64]] | None = Field(
+        None, description="Scores for each fold from cross-validation"
+    )
 
 
 class DecisionArtifact(Artifact):
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index 945353868..e8916d816 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -51,7 +51,7 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
 
         metrics_values = {name: [] for name in chosen_metrics}
         all_val_scores = []
-        for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(0):
+        for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator():
             self.fit(train_utterances, train_labels)
             val_scores = self.predict(val_utterances)
             for name, fn in chosen_metrics.items():
@@ -59,7 +59,7 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
             all_val_scores.append(val_scores)
 
         # save all predictions unbinded to preserve folding
-        self._artifact = ScorerArtifact(validation_scores=all_val_scores)
+        self._artifact = ScorerArtifact(folded_scores=all_val_scores)
 
         return {name: np.mean(values_list) for name, values_list in metrics_values.items()}
 

From a300f19cd354c030251a65286460b8cc82ba9618 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 06:11:43 +0300
Subject: [PATCH 07/74] implement cv iterator for decision node

---
 .../context/data_handler/_data_handler.py     |  3 ++
 .../optimization_info/_optimization_info.py   |  9 ++++
 autointent/modules/abc/_decision.py           | 42 +++++++++++++------
 3 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index 7fa2a4687..f7ef69483 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -110,6 +110,9 @@ def train_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         split = f"{Split.TRAIN}_{idx}" if idx is not None else Split.TRAIN
         return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
+    def train_labels_folded(self) -> list[ListOfGenericLabels]:
+        return [self.train_labels(j) for j in range(self.n_folds)]
+
     def validation_utterances(self, idx: int | None = None) -> list[str]:
         """
         Retrieve validation utterances from the dataset.
diff --git a/autointent/context/optimization_info/_optimization_info.py b/autointent/context/optimization_info/_optimization_info.py
index 7ec1e5a01..f426d17d4 100644
--- a/autointent/context/optimization_info/_optimization_info.py
+++ b/autointent/context/optimization_info/_optimization_info.py
@@ -166,6 +166,15 @@ def get_best_validation_scores(self) -> NDArray[np.float64] | None:
         best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring)  # type: ignore[assignment]
         return best_scorer_artifact.validation_scores
 
+    def get_best_folded_scores(self) -> list[NDArray[np.float64]] | None:
+        """
+        Retrieve the validation scores from the best scorer node.
+
+        :return: Validation scores as a numpy array.
+        """
+        best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring)  # type: ignore[assignment]
+        return best_scorer_artifact.folded_scores
+
     def get_best_test_scores(self) -> NDArray[np.float64] | None:
         """
         Retrieve the test scores from the best scorer node.
diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py
index f7501f4eb..b0603ddb8 100644
--- a/autointent/modules/abc/_decision.py
+++ b/autointent/modules/abc/_decision.py
@@ -49,9 +49,10 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         :return: Computed metrics value for the test set or error code of metrics
         """
         labels, scores = get_decision_evaluation_data(context, "validation")
-        self._decisions = self.predict(scores)
+        decisions = self.predict(scores)
         chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics}
-        return self.score_metrics((labels, self._decisions), chosen_metrics)
+        self._artifact = DecisionArtifact(labels=decisions)
+        return self.score_metrics((labels, decisions), chosen_metrics)
 
     def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
@@ -61,14 +62,34 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         :param split: Target split
         :return: Computed metrics value for the test set or error code of metrics
         """
-        labels, scores = get_decision_evaluation_data(context, "validation")
-        self._decisions = self.predict(scores)
+        labels = context.data_handler.train_labels_folded()
+        scores = context.optimization_info.get_best_folded_scores()
+
+        if scores is None:
+            msg = "No folded scores are found."
+            raise RuntimeError(msg)
+
         chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics}
-        return self.score_metrics((labels, self._decisions), chosen_metrics)
+        metrics_values = {name: [] for name in chosen_metrics}
+        all_val_decisions = []
+        for j in range(context.data_handler.n_folds):
+            val_labels = labels[j]
+            val_scores = scores[j]
+            train_folds = [i for i in range(context.data_handler.n_folds) if i != j]
+            train_labels = [ut for i_fold in train_folds for ut in labels[i_fold]]
+            train_scores = [ut for i_fold in train_folds for ut in scores[i_fold]]
+            self.fit(train_scores, train_labels, context.data_handler.tags)
+            val_decisions = self.predict(val_scores)
+            for name, fn in chosen_metrics.items():
+                metrics_values[name].append(fn(val_labels, val_decisions))
+            all_val_decisions.append(val_decisions)
+
+        self._artifact = DecisionArtifact(labels=[pred for pred_list in all_val_decisions for pred in pred_list])
+        return {name: np.mean(values_list) for name, values_list in metrics_values.items()}
 
     def get_assets(self) -> DecisionArtifact:
         """Return useful assets that represent intermediate data into context."""
-        return DecisionArtifact(labels=self._decisions)
+        return self._artifact
 
     def clear_cache(self) -> None:
         """Clear cache."""
@@ -80,7 +101,7 @@ def _validate_task(self, scores: npt.NDArray[Any], labels: ListOfGenericLabels)
         if self._n_classes != scores.shape[1]:
             msg = (
                 "There is a mismatch between provided labels and scores. "
-                f"Labels contains {self._n_classes} classes, but scores contain "
+                f"Labels contain {self._n_classes} classes, but scores contain "
                 f"probabilities for {scores.shape[1]} classes."
             )
             raise ValueError(msg)
@@ -92,7 +113,7 @@ def get_train_data(self, context: Context) -> tuple[npt.NDArray[Any], ListOfGene
 
 def get_decision_evaluation_data(
     context: Context,
-    split: Literal["train", "validation", "test"], # TODO add index to handle both ho and cv
+    split: Literal["train", "validation"],
 ) -> tuple[ListOfGenericLabels, npt.NDArray[np.float64]]:
     """
     Get decision evaluation data.
@@ -107,11 +128,8 @@ def get_decision_evaluation_data(
     elif split == "validation":
         labels = context.data_handler.validation_labels(1)
         scores = context.optimization_info.get_best_validation_scores()
-    elif split == "test":
-        labels = context.data_handler.test_labels()
-        scores = context.optimization_info.get_best_test_scores()
     else:
-        message = f"Invalid split '{split}' provided. Expected one of 'train', 'validation', or 'test'."
+        message = f"Invalid split '{split}' provided. Expected one of 'train', 'validation'."
         raise ValueError(message)
 
     if scores is None:

From 815561f1b1e31d832a6228f0971ba67b7e0515df Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 06:42:35 +0300
Subject: [PATCH 08/74] move cv iteration to base module definition

---
 autointent/modules/abc/_base.py    | 20 +++++++++++++++++++-
 autointent/modules/abc/_scoring.py | 15 ++++-----------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index 61de46241..4f6635ba6 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -2,9 +2,11 @@
 
 import logging
 from abc import ABC, abstractmethod
+from collections.abc import Iterable
 from pathlib import Path
 from typing import Any
 
+import numpy as np
 import numpy.typing as npt
 
 from autointent._dump_tools import Dumper
@@ -106,7 +108,7 @@ def get_embedder_name(self) -> str | None:
         return None
 
     @staticmethod
-    def score_metrics(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> dict[str, float]:
+    def score_metrics_ho(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> dict[str, float]:
         """
         Score metrics on the test set.
 
@@ -119,6 +121,22 @@ def score_metrics(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> dict
             metrics[metric_name] = metric_fn(*params)
         return metrics
 
+    def score_metrics_cv(
+        self, metrics_dict: dict[str, Any], cv_iterator: Iterable[tuple[list, list, list, list]]
+    ) -> tuple[dict[str, float], list[ListOfGenericLabels] | list[npt.NDArray[Any]]]:
+        metrics_values = {name: [] for name in metrics_dict}
+        all_val_preds = []
+
+        for train_utterances, train_labels, val_utterances, val_labels in cv_iterator:
+            self.fit(train_utterances, train_labels)
+            val_preds = self.predict(val_utterances)
+            for name, fn in metrics_dict.items():
+                metrics_values[name].append(fn(val_labels, val_preds))
+            all_val_preds.append(val_preds)
+
+        metrics = {name: np.mean(values_list) for name, values_list in metrics_values.items()}
+        return metrics, all_val_preds
+
     def _validate_multilabel(self, data_is_multilabel: bool) -> None:
         if data_is_multilabel and not self.supports_multilabel:
             msg = f'"{self.name}" module is incompatible with multi-label classifiction.'
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index e8916d816..8d1e42057 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -3,7 +3,6 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-import numpy as np
 import numpy.typing as npt
 
 from autointent import Context
@@ -49,19 +48,13 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
         chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
 
-        metrics_values = {name: [] for name in chosen_metrics}
-        all_val_scores = []
-        for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator():
-            self.fit(train_utterances, train_labels)
-            val_scores = self.predict(val_utterances)
-            for name, fn in chosen_metrics.items():
-                metrics_values[name].append(fn(val_labels, val_scores))
-            all_val_scores.append(val_scores)
+        metrics_calculated, all_val_scores = self.score_metrics_cv(
+            chosen_metrics, context.data_handler.validation_iterator()
+        )
 
-        # save all predictions unbinded to preserve folding
         self._artifact = ScorerArtifact(folded_scores=all_val_scores)
 
-        return {name: np.mean(values_list) for name, values_list in metrics_values.items()}
+        return metrics_calculated
 
     def get_assets(self) -> ScorerArtifact:
         """

From f3ef812f3dddc52a61a0c932ec199f268411ff60 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 06:43:55 +0300
Subject: [PATCH 09/74] implement cv iterator for embedding node

---
 autointent/modules/embedding/_logreg.py    | 30 +++++++++-------
 autointent/modules/embedding/_retrieval.py | 40 +++++++++-------------
 2 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py
index d729b8741..e9f15b8e4 100644
--- a/autointent/modules/embedding/_logreg.py
+++ b/autointent/modules/embedding/_logreg.py
@@ -1,7 +1,5 @@
 """LogregAimedEmbedding class for a proxy optimzation of embedding."""
 
-from typing import Literal
-
 import numpy as np
 from numpy.typing import NDArray
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
@@ -129,29 +127,35 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
 
         self._classifier.fit(embeddings, labels)
 
-    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
+    def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
         Evaluate the embedding model using a specified metric function.
 
         :param context: The context containing test data and labels.
-        :param split: Target split
         :return: Computed metrics value for the test set or error code of metrics
         """
-        if split == "validation":
-            utterances = context.data_handler.validation_utterances(0)
-            labels = context.data_handler.validation_labels(0)
-        elif split == "test":
-            utterances = context.data_handler.test_utterances()
-            labels = context.data_handler.test_labels()
-        else:
-            message = f"Invalid split '{split}' provided. Expected one of 'validation', or 'test'."
-            raise ValueError(message)
+        utterances = context.data_handler.validation_utterances(0)
+        labels = context.data_handler.validation_labels(0)
 
         probas = self.predict(utterances)
         metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
         chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+
         return self.score_metrics((labels, probas), chosen_metrics)
 
+    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
+        """
+        Evaluate the embedding model using a specified metric function.
+
+        :param context: The context containing test data and labels.
+        :return: Computed metrics value for the test set or error code of metrics
+        """
+        metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
+        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+
+        metrics_calculated, _ = self.score_metrics_cv(chosen_metrics, context.data_handler.validation_iterator())
+        return metrics_calculated
+
     def get_assets(self) -> RetrieverArtifact:
         """
         Get the classifier artifacts for this module.
diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py
index b7d919add..2b166aacb 100644
--- a/autointent/modules/embedding/_retrieval.py
+++ b/autointent/modules/embedding/_retrieval.py
@@ -1,7 +1,5 @@
 """RetrievalAimedEmbedding class for a proxy optimization of embedding."""
 
-from typing import Literal
-
 from autointent import Context, VectorIndex
 from autointent.context.optimization_info import RetrieverArtifact
 from autointent.custom_types import ListOfLabels
@@ -109,28 +107,27 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         )
         self._vector_index.add(utterances, labels)
 
-    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
+    def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
         Evaluate the embedding model using a specified metric function.
 
         :param context: The context containing test data and labels.
-        :param split: Target split
         :return: Computed metrics value for the test set or error code of metrics
         """
-        if split == "validation":
-            utterances = context.data_handler.validation_utterances(0)
-            labels = context.data_handler.validation_labels(0)
-        elif split == "test":
-            utterances = context.data_handler.test_utterances()
-            labels = context.data_handler.test_labels()
-        else:
-            message = f"Invalid split '{split}' provided. Expected one of 'validation', or 'test'."
-            raise ValueError(message)
-        predictions, _, _ = self._vector_index.query(utterances, self.k)
+        utterances = context.data_handler.validation_utterances(0)
+        labels = context.data_handler.validation_labels(0)
+        predictions = self.predict(utterances)
+
+        metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS
+        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+        return self.score_metrics_ho((labels, predictions), chosen_metrics)
 
+    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS
         chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
-        return self.score_metrics((labels, predictions), chosen_metrics)
+
+        metrics_calculated, _ = self.score_metrics_cv(chosen_metrics, context.data_handler.validation_iterator())
+        return metrics_calculated
 
     def get_assets(self) -> RetrieverArtifact:
         """
@@ -144,17 +141,12 @@ def clear_cache(self) -> None:
         """Clear cached data in memory used by the vector index."""
         self._vector_index.clear_ram()
 
-    def predict(self, utterances: list[str]) -> tuple[list[ListOfLabels], list[list[float]], list[list[str]]]:
+    def predict(self, utterances: list[str]) -> list[ListOfLabels]:
         """
         Predict the nearest neighbors for a list of utterances.
 
         :param utterances: List of utterances for which nearest neighbors are to be retrieved.
-        :return: A tuple containing:
-            - labels: List of retrieved labels for each utterance.
-            - distances: List of distances to the nearest neighbors.
-            - texts: List of retrieved text data corresponding to the neighbors.
+        :return: List of labels for each retrieved utterance.
         """
-        return self._vector_index.query(
-            utterances,
-            self.k,
-        )
+        predictions, _, _ = self._vector_index.query(utterances, self.k)
+        return predictions

From 593744ae9155787aae290eee876d7e216ba4839c Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 06:57:46 +0300
Subject: [PATCH 10/74] add training to `score_ho` of each node

---
 autointent/modules/abc/_decision.py        |  9 ++++++---
 autointent/modules/abc/_scoring.py         | 11 +++++++----
 autointent/modules/embedding/_logreg.py    | 11 +++++++----
 autointent/modules/embedding/_retrieval.py | 11 +++++++----
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py
index b0603ddb8..391a23675 100644
--- a/autointent/modules/abc/_decision.py
+++ b/autointent/modules/abc/_decision.py
@@ -48,11 +48,14 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         :param split: Target split
         :return: Computed metrics value for the test set or error code of metrics
         """
-        labels, scores = get_decision_evaluation_data(context, "validation")
-        decisions = self.predict(scores)
+        train_scores, train_labels = self.get_train_data(context)
+        self.fit(train_scores, train_labels, context.data_handler.tags)
+
+        val_labels, val_scores = get_decision_evaluation_data(context, "validation")
+        decisions = self.predict(val_scores)
         chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics}
         self._artifact = DecisionArtifact(labels=decisions)
-        return self.score_metrics((labels, decisions), chosen_metrics)
+        return self.score_metrics_ho((val_labels, decisions), chosen_metrics)
 
     def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index 8d1e42057..603ea73af 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -23,10 +23,13 @@ class ScoringModule(Module, ABC):
     supports_oos = False
 
     def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
-        utterances = context.data_handler.validation_utterances(0)
-        labels = context.data_handler.validation_labels(0)
+        train_utterances, train_labels = self.get_train_data(context)
+        self.fit(train_utterances, train_labels)
 
-        scores = self.predict(utterances)
+        val_utterances = context.data_handler.validation_utterances(0)
+        val_labels = context.data_handler.validation_labels(0)
+
+        scores = self.predict(val_utterances)
 
         self._artifact = ScorerArtifact(
             train_scores=self.predict(context.data_handler.train_utterances(1)),
@@ -35,7 +38,7 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
 
         metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
         chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
-        return self.score_metrics((labels, scores), chosen_metrics)
+        return self.score_metrics_ho((val_labels, scores), chosen_metrics)
 
     def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py
index e9f15b8e4..c50dab19d 100644
--- a/autointent/modules/embedding/_logreg.py
+++ b/autointent/modules/embedding/_logreg.py
@@ -134,14 +134,17 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         :param context: The context containing test data and labels.
         :return: Computed metrics value for the test set or error code of metrics
         """
-        utterances = context.data_handler.validation_utterances(0)
-        labels = context.data_handler.validation_labels(0)
+        train_utterances, train_labels = self.get_train_data(context)
+        self.fit(train_utterances, train_labels)
 
-        probas = self.predict(utterances)
+        val_utterances = context.data_handler.validation_utterances(0)
+        val_labels = context.data_handler.validation_labels(0)
+
+        probas = self.predict(val_utterances)
         metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
         chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
 
-        return self.score_metrics((labels, probas), chosen_metrics)
+        return self.score_metrics_ho((val_labels, probas), chosen_metrics)
 
     def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py
index 2b166aacb..c68e62af0 100644
--- a/autointent/modules/embedding/_retrieval.py
+++ b/autointent/modules/embedding/_retrieval.py
@@ -114,13 +114,16 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         :param context: The context containing test data and labels.
         :return: Computed metrics value for the test set or error code of metrics
         """
-        utterances = context.data_handler.validation_utterances(0)
-        labels = context.data_handler.validation_labels(0)
-        predictions = self.predict(utterances)
+        train_utterances, train_labels = self.get_train_data(context)
+        self.fit(train_utterances, train_labels)
+
+        val_utterances = context.data_handler.validation_utterances(0)
+        val_labels = context.data_handler.validation_labels(0)
+        predictions = self.predict(val_utterances)
 
         metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS
         chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
-        return self.score_metrics_ho((labels, predictions), chosen_metrics)
+        return self.score_metrics_ho((val_labels, predictions), chosen_metrics)
 
     def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
         metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS

From 6fa0b24c20ee4c2159df542e4f27f99a2ef5cfe9 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 06:58:28 +0300
Subject: [PATCH 11/74] properly define base module

---
 autointent/modules/abc/_base.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index 4f6635ba6..b6359f863 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -35,8 +35,7 @@ def fit(self, *args: tuple[Any], **kwargs: dict[str, Any]) -> None:
         :param kwargs: Kwargs to fit
         """
 
-    @abstractmethod
-    def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, float]:
+    def score(self, context: Context, metrics: list[str]) -> dict[str, float]:
         """
         Calculate metric on test set and return metric value.
 
@@ -44,6 +43,20 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f
         :param split: Split to score on
         :return: Computed metrics value for the test set or error code of metrics
         """
+        if context.data_handler.scheme == "ho":
+            return self.score_ho(context, metrics)
+        if context.data_handler.scheme == "cv":
+            return self.score_cv(context, metrics)
+        msg = "Something's wrong with validation schemas"
+        raise RuntimeError(msg)
+
+    @abstractmethod
+    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
+        ...
+
+    @abstractmethod
+    def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
+        ...
 
     @abstractmethod
     def get_assets(self) -> Artifact:

From 278855c8460adf1a694c764dd28031fa74abc948 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 06:58:56 +0300
Subject: [PATCH 12/74] fix codestyle

---
 autointent/modules/abc/_base.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index b6359f863..ec368bc6d 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -51,12 +51,10 @@ def score(self, context: Context, metrics: list[str]) -> dict[str, float]:
         raise RuntimeError(msg)
 
     @abstractmethod
-    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
-        ...
+    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: ...
 
     @abstractmethod
-    def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
-        ...
+    def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: ...
 
     @abstractmethod
     def get_assets(self) -> Artifact:

From bf9074f8100468e8618504877796439db9bb5e03 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 07:08:29 +0300
Subject: [PATCH 13/74] remove regexp node

---
 autointent/modules/regexp/__init__.py    |   5 -
 autointent/modules/regexp/_regexp.py     | 156 -----------------------
 autointent/nodes/__init__.py             |   3 +-
 autointent/nodes/_nodes_info/__init__.py |   3 -
 autointent/nodes/_nodes_info/_regexp.py  |  22 ----
 5 files changed, 1 insertion(+), 188 deletions(-)
 delete mode 100644 autointent/modules/regexp/__init__.py
 delete mode 100644 autointent/modules/regexp/_regexp.py
 delete mode 100644 autointent/nodes/_nodes_info/_regexp.py

diff --git a/autointent/modules/regexp/__init__.py b/autointent/modules/regexp/__init__.py
deleted file mode 100644
index b074482ff..000000000
--- a/autointent/modules/regexp/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Weakly supported module for now."""
-
-from ._regexp import RegExp
-
-__all__ = ["RegExp"]
diff --git a/autointent/modules/regexp/_regexp.py b/autointent/modules/regexp/_regexp.py
deleted file mode 100644
index f486b682b..000000000
--- a/autointent/modules/regexp/_regexp.py
+++ /dev/null
@@ -1,156 +0,0 @@
-"""Module for regular expressions based intent detection."""
-
-import re
-from typing import Any, Literal, TypedDict
-
-from autointent import Context
-from autointent.context.data_handler._data_handler import RegexPatterns
-from autointent.context.optimization_info import Artifact
-from autointent.custom_types import LabelType
-from autointent.metrics import REGEXP_METRICS
-from autointent.modules.abc import Module
-from autointent.schemas import Intent
-
-
-class RegexPatternsCompiled(TypedDict):
-    """Compiled regex patterns."""
-
-    id: int
-    """Intent ID."""
-    regexp_full_match: list[re.Pattern[str]]
-    """Compiled regex patterns for full match."""
-    regexp_partial_match: list[re.Pattern[str]]
-    """Compiled regex patterns for partial match."""
-
-
-class RegExp(Module):
-    """Regular expressions based intent detection module."""
-
-    name = "regexp"
-
-    @classmethod
-    def from_context(cls, context: Context) -> "RegExp":
-        """Initialize from context."""
-        return cls()
-
-    def fit(self, intents: list[dict[str, Any]]) -> None:
-        """
-        Fit the model.
-
-        :param intents: Intents to fit
-        """
-        intents_parsed = [Intent(**dct) for dct in intents]
-        self.regexp_patterns = [
-            RegexPatterns(
-                id=intent.id,
-                regexp_full_match=intent.regexp_full_match,
-                regexp_partial_match=intent.regexp_partial_match,
-            )
-            for intent in intents_parsed
-        ]
-        self._compile_regex_patterns()
-
-    def predict(self, utterances: list[str]) -> list[LabelType]:
-        """
-        Predict intents for utterances.
-
-        :param utterances: Utterances to predict
-        """
-        return [self._predict_single(utterance)[0] for utterance in utterances]
-
-    def predict_with_metadata(
-        self,
-        utterances: list[str],
-    ) -> tuple[list[LabelType], list[dict[str, Any]] | None]:
-        """
-        Predict intents for utterances with metadata.
-
-        :param utterances: Utterances to predict
-        """
-        predictions, metadata = [], []
-        for utterance in utterances:
-            prediction, matches = self._predict_single(utterance)
-            predictions.append(prediction)
-            metadata.append(matches)
-        return predictions, metadata
-
-    def _match(self, utterance: str, intent_record: RegexPatternsCompiled) -> dict[str, list[str]]:
-        """
-        Match utterance with intent record.
-
-        :param utterance: Utterance to match
-        :param intent_record: Intent record to match
-        """
-        full_matches = [
-            pattern.pattern
-            for pattern in intent_record["regexp_full_match"]
-            if pattern.fullmatch(utterance) is not None
-        ]
-        partial_matches = [
-            pattern.pattern
-            for pattern in intent_record["regexp_partial_match"]
-            if pattern.search(utterance) is not None
-        ]
-        return {"full_matches": full_matches, "partial_matches": partial_matches}
-
-    def _predict_single(self, utterance: str) -> tuple[LabelType, dict[str, list[str]]]:
-        """
-        Predict intent for a single utterance.
-
-        :param utterance: Utterance to predict
-        """
-        # todo test this
-        prediction = set()
-        matches: dict[str, list[str]] = {"full_matches": [], "partial_matches": []}
-        for intent_record in self.regexp_patterns_compiled:
-            intent_matches = self._match(utterance, intent_record)
-            if intent_matches["full_matches"] or intent_matches["partial_matches"]:
-                prediction.add(intent_record["id"])
-            matches["full_matches"].extend(intent_matches["full_matches"])
-            matches["partial_matches"].extend(intent_matches["partial_matches"])
-        return list(prediction), matches
-
-    def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]:
-        """
-        Calculate metric on test set and return metric value.
-
-        :param context: Context to score
-        :param split: Split to score on
-        :return: Computed metrics value for the test set or error code of metrics
-        """
-        # TODO add parameter to a whole pipeline (or just to regexp module):
-        # whether or not to omit utterances on next stages if they were detected with regexp module
-        assets = {
-            "test_matches": list(self.predict(context.data_handler.test_utterances())),
-        }
-        if assets["test_matches"] is None:
-            msg = "no matches found"
-            raise ValueError(msg)
-        chosen_metrics = {name: fn for name, fn in REGEXP_METRICS.items() if name in metrics}
-        return self.score_metrics((context.data_handler.test_labels(), assets["test_matches"]), chosen_metrics)
-
-    def clear_cache(self) -> None:
-        """Clear cache."""
-        del self.regexp_patterns
-
-    def get_assets(self) -> Artifact:
-        """Get assets."""
-        return Artifact()
-
-    def _compile_regex_patterns(self) -> None:
-        """Compile regex patterns."""
-        self.regexp_patterns_compiled = [
-            RegexPatternsCompiled(
-                id=regexp_patterns["id"],
-                regexp_full_match=[
-                    re.compile(pattern, flags=re.IGNORECASE) for pattern in regexp_patterns["regexp_full_match"]
-                ],
-                regexp_partial_match=[
-                    re.compile(ptn, flags=re.IGNORECASE) for ptn in regexp_patterns["regexp_partial_match"]
-                ],
-            )
-            for regexp_patterns in self.regexp_patterns
-        ]
-
-    def get_train_data(self, context: Context) -> tuple:
-        return ()
diff --git a/autointent/nodes/__init__.py b/autointent/nodes/__init__.py
index f92cd4ef1..0257257a4 100644
--- a/autointent/nodes/__init__.py
+++ b/autointent/nodes/__init__.py
@@ -1,7 +1,7 @@
 """Some core components used in AutoIntent behind the scenes."""
 
 from ._inference_node import InferenceNode
-from ._nodes_info import DecisionNodeInfo, EmbeddingNodeInfo, NodeInfo, RegExpNodeInfo, ScoringNodeInfo
+from ._nodes_info import DecisionNodeInfo, EmbeddingNodeInfo, NodeInfo, ScoringNodeInfo
 from ._optimization import NodeOptimizer
 from .schemes import OptimizationConfig
 
@@ -12,6 +12,5 @@
     "NodeInfo",
     "NodeOptimizer",
     "OptimizationConfig",
-    "RegExpNodeInfo",
     "ScoringNodeInfo",
 ]
diff --git a/autointent/nodes/_nodes_info/__init__.py b/autointent/nodes/_nodes_info/__init__.py
index 39209acb2..ed1cf9a9b 100644
--- a/autointent/nodes/_nodes_info/__init__.py
+++ b/autointent/nodes/_nodes_info/__init__.py
@@ -3,14 +3,12 @@
 from ._base import NodeInfo
 from ._decision import DecisionNodeInfo
 from ._embedding import EmbeddingNodeInfo
-from ._regexp import RegExpNodeInfo
 from ._scoring import ScoringNodeInfo
 
 NODES_INFO: dict[str, NodeInfo] = {
     NodeType.embedding: EmbeddingNodeInfo(),
     NodeType.scoring: ScoringNodeInfo(),
     NodeType.decision: DecisionNodeInfo(),
-    NodeType.regexp: RegExpNodeInfo(),
 }
 
 __all__ = [
@@ -18,6 +16,5 @@
     "DecisionNodeInfo",
     "EmbeddingNodeInfo",
     "NodeInfo",
-    "RegExpNodeInfo",
     "ScoringNodeInfo",
 ]
diff --git a/autointent/nodes/_nodes_info/_regexp.py b/autointent/nodes/_nodes_info/_regexp.py
deleted file mode 100644
index 57f98d102..000000000
--- a/autointent/nodes/_nodes_info/_regexp.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""Regexp node info."""
-
-from collections.abc import Mapping
-from typing import ClassVar
-
-from autointent.custom_types import NodeType
-from autointent.metrics import REGEXP_METRICS
-from autointent.metrics.regexp import RegexpMetricFn
-from autointent.modules.abc import Module
-from autointent.modules.regexp import RegExp
-
-from ._base import NodeInfo
-
-
-class RegExpNodeInfo(NodeInfo):
-    """Regexp node info."""
-
-    metrics_available: ClassVar[Mapping[str, RegexpMetricFn]] = REGEXP_METRICS
-
-    modules_available: ClassVar[Mapping[str, type[Module]]] = {NodeType.regexp: RegExp}
-
-    node_type = NodeType.regexp

From 8f30ec905b691a93d0060c62dff2e7751c50c8aa Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 07:09:26 +0300
Subject: [PATCH 14/74] remove regexp validator

---
 autointent/nodes/schemes.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/autointent/nodes/schemes.py b/autointent/nodes/schemes.py
index 8cd5a5ca9..58cba623b 100644
--- a/autointent/nodes/schemes.py
+++ b/autointent/nodes/schemes.py
@@ -8,7 +8,7 @@
 
 from autointent.custom_types import NodeType
 from autointent.modules.abc import Module
-from autointent.nodes import DecisionNodeInfo, EmbeddingNodeInfo, RegExpNodeInfo, ScoringNodeInfo
+from autointent.nodes import DecisionNodeInfo, EmbeddingNodeInfo, ScoringNodeInfo
 
 
 def generate_models_and_union_type_for_classes(
@@ -91,22 +91,7 @@ class ScoringNodeValidator(BaseModel):
     search_space: list[ScoringSearchSpaceType]
 
 
-RegexpSearchSpaceType: TypeAlias = generate_models_and_union_type_for_classes(  # type: ignore[valid-type]
-    list(RegExpNodeInfo.modules_available.values())
-)
-RegexpMetrics: TypeAlias = Literal[tuple(RegExpNodeInfo.metrics_available.keys())]  # type: ignore[valid-type]
-
-
-class RegexNodeValidator(BaseModel):
-    """Search space configuration for the Regexp node."""
-
-    node_type: NodeType = NodeType.regexp
-    target_metric: RegexpMetrics
-    metrics: list[RegexpMetrics] | None = None
-    search_space: list[RegexpSearchSpaceType]
-
-
-SearchSpaceTypes: TypeAlias = RegexNodeValidator | EmbeddingNodeValidator | ScoringNodeValidator | DecisionNodeValidator
+SearchSpaceTypes: TypeAlias = EmbeddingNodeValidator | ScoringNodeValidator | DecisionNodeValidator
 
 
 class OptimizationConfig(RootModel[list[SearchSpaceTypes]]):

From b2c8986b73e5c860aeffb826e5b770e26d1a7613 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 07:28:47 +0300
Subject: [PATCH 15/74] fix typing problems (except `DataHandler._split_cv`)

---
 autointent/_pipeline/_pipeline.py                  |  6 +++---
 autointent/configs/_optimization.py                |  3 +++
 autointent/context/_context.py                     |  6 ++++--
 autointent/context/data_handler/_data_handler.py   |  6 +++---
 autointent/modules/abc/_base.py                    | 14 ++++++++------
 autointent/modules/abc/_decision.py                | 10 +++++-----
 autointent/modules/abc/_embedding.py               |  2 +-
 autointent/modules/abc/_scoring.py                 | 10 +++++++++-
 .../modules/scoring/_description/description.py    |  2 +-
 autointent/nodes/_optimization/_node_optimizer.py  |  2 +-
 10 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index 2201575b0..42a93e415 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -3,7 +3,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Literal
 
 import numpy as np
 import yaml
@@ -122,7 +122,7 @@ def _is_inference(self) -> bool:
         """
         return isinstance(self.nodes[NodeType.scoring], InferenceNode)
 
-    def fit(self, dataset: Dataset) -> Context:
+    def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> Context:
         """
         Optimize the pipeline from dataset.
 
@@ -134,7 +134,7 @@ def fit(self, dataset: Dataset) -> Context:
             raise RuntimeError(msg)
 
         context = Context()
-        context.set_dataset(dataset)
+        context.set_dataset(dataset, scheme)
         context.configure_logging(self.logging_config)
         context.configure_vector_index(self.vector_index_config, self.embedder_config)
         context.configure_cross_encoder(self.cross_encoder_config)
diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py
index 610eece5b..27ecc52a2 100644
--- a/autointent/configs/_optimization.py
+++ b/autointent/configs/_optimization.py
@@ -1,6 +1,7 @@
 """Configuration for the optimization process."""
 
 from pathlib import Path
+from typing import Literal
 
 from pydantic import BaseModel, Field
 
@@ -12,6 +13,8 @@ class DataConfig(BaseModel):
 
     train_path: str | Path
     """Path to the training data. Can be local path or HF repo."""
+    scheme: Literal["ho", "cv"]
+    """Hold-out or cross-validation."""
 
 
 class TaskConfig(BaseModel):
diff --git a/autointent/context/_context.py b/autointent/context/_context.py
index 1081ea3a1..76baf70c1 100644
--- a/autointent/context/_context.py
+++ b/autointent/context/_context.py
@@ -3,7 +3,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 import yaml
 
@@ -83,9 +83,10 @@ def configure_data(self, config: DataConfig) -> None:
         self.data_handler = DataHandler(
             dataset=load_dataset(config.train_path),
             random_seed=self.seed,
+            scheme=config.scheme
         )
 
-    def set_dataset(self, dataset: Dataset) -> None:
+    def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"]) -> None:
         """
         Set the datasets for training, validation and testing.
 
@@ -94,6 +95,7 @@ def set_dataset(self, dataset: Dataset) -> None:
         self.data_handler = DataHandler(
             dataset=dataset,
             random_seed=self.seed,
+            scheme=scheme,
         )
 
     def get_inference_config(self) -> dict[str, Any]:
diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index f7ef69483..e5f9ba361 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -9,7 +9,7 @@
 from transformers import set_seed
 
 from autointent import Dataset
-from autointent.custom_types import ListOfGenericLabels, Split
+from autointent.custom_types import ListOfGenericLabels, ListOfLabels, Split
 
 from ._stratification import split_dataset
 
@@ -169,7 +169,7 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
         return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
-    def validation_iterator(self) -> Generator[tuple[list, list, list, list]]:
+    def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]:
         if self.scheme == "ho":
             msg = "Cannot call cross-validation on hold-out DataHandler"
             raise RuntimeError(msg)
@@ -180,7 +180,7 @@ def validation_iterator(self) -> Generator[tuple[list, list, list, list]]:
             train_folds = [i for i in range(self.n_folds) if i != j]
             train_utterances = [ut for i_fold in train_folds for ut in self.train_utterances(i_fold)]
             train_labels = [ut for i_fold in train_folds for ut in self.train_labels(i_fold)]
-            yield train_utterances, train_labels, val_utterances, val_labels
+            yield train_utterances, train_labels, val_utterances, val_labels  # type: ignore[misc]
 
         msg = "something's wrong"
         raise RuntimeError(msg)
diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index ec368bc6d..2674510eb 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -12,7 +12,7 @@
 from autointent._dump_tools import Dumper
 from autointent.context import Context
 from autointent.context.optimization_info import Artifact
-from autointent.custom_types import ListOfGenericLabels
+from autointent.custom_types import ListOfGenericLabels, ListOfLabels
 from autointent.exceptions import WrongClassificationError
 
 logger = logging.getLogger(__name__)
@@ -133,20 +133,22 @@ def score_metrics_ho(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> d
         return metrics
 
     def score_metrics_cv(
-        self, metrics_dict: dict[str, Any], cv_iterator: Iterable[tuple[list, list, list, list]]
+        self,
+        metrics_dict: dict[str, Any],
+        cv_iterator: Iterable[tuple[list[str], ListOfLabels, list[str], ListOfLabels]],
     ) -> tuple[dict[str, float], list[ListOfGenericLabels] | list[npt.NDArray[Any]]]:
-        metrics_values = {name: [] for name in metrics_dict}
+        metrics_values: dict[str, list[float]] = {name: [] for name in metrics_dict}
         all_val_preds = []
 
         for train_utterances, train_labels, val_utterances, val_labels in cv_iterator:
-            self.fit(train_utterances, train_labels)
+            self.fit(train_utterances, train_labels)  # type: ignore[arg-type]
             val_preds = self.predict(val_utterances)
             for name, fn in metrics_dict.items():
                 metrics_values[name].append(fn(val_labels, val_preds))
             all_val_preds.append(val_preds)
 
-        metrics = {name: np.mean(values_list) for name, values_list in metrics_values.items()}
-        return metrics, all_val_preds
+        metrics = {name: float(np.mean(values_list)) for name, values_list in metrics_values.items()}
+        return metrics, all_val_preds  # type: ignore[return-value]
 
     def _validate_multilabel(self, data_is_multilabel: bool) -> None:
         if data_is_multilabel and not self.supports_multilabel:
diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py
index 391a23675..6d78d050a 100644
--- a/autointent/modules/abc/_decision.py
+++ b/autointent/modules/abc/_decision.py
@@ -48,8 +48,8 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         :param split: Target split
         :return: Computed metrics value for the test set or error code of metrics
         """
-        train_scores, train_labels = self.get_train_data(context)
-        self.fit(train_scores, train_labels, context.data_handler.tags)
+        train_scores, train_labels, tags = self.get_train_data(context)
+        self.fit(train_scores, train_labels, tags)
 
         val_labels, val_scores = get_decision_evaluation_data(context, "validation")
         decisions = self.predict(val_scores)
@@ -73,7 +73,7 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
             raise RuntimeError(msg)
 
         chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics}
-        metrics_values = {name: [] for name in chosen_metrics}
+        metrics_values: dict[str, list[float]] = {name: [] for name in chosen_metrics}
         all_val_decisions = []
         for j in range(context.data_handler.n_folds):
             val_labels = labels[j]
@@ -81,14 +81,14 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
             train_folds = [i for i in range(context.data_handler.n_folds) if i != j]
             train_labels = [ut for i_fold in train_folds for ut in labels[i_fold]]
             train_scores = [ut for i_fold in train_folds for ut in scores[i_fold]]
-            self.fit(train_scores, train_labels, context.data_handler.tags)
+            self.fit(train_scores, train_labels, context.data_handler.tags)  # type: ignore[arg-type]
             val_decisions = self.predict(val_scores)
             for name, fn in chosen_metrics.items():
                 metrics_values[name].append(fn(val_labels, val_decisions))
             all_val_decisions.append(val_decisions)
 
         self._artifact = DecisionArtifact(labels=[pred for pred_list in all_val_decisions for pred in pred_list])
-        return {name: np.mean(values_list) for name, values_list in metrics_values.items()}
+        return {name: float(np.mean(values_list)) for name, values_list in metrics_values.items()}
 
     def get_assets(self) -> DecisionArtifact:
         """Return useful assets that represent intermediate data into context."""
diff --git a/autointent/modules/abc/_embedding.py b/autointent/modules/abc/_embedding.py
index 708a749eb..5a30222b6 100644
--- a/autointent/modules/abc/_embedding.py
+++ b/autointent/modules/abc/_embedding.py
@@ -11,4 +11,4 @@ class EmbeddingModule(Module, ABC):
     """Base class for embedding modules."""
 
     def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]:
-        return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))
+        return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))  # type: ignore[return-value]
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index 603ea73af..5005e4662 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -22,6 +22,14 @@ class ScoringModule(Module, ABC):
 
     supports_oos = False
 
+    @abstractmethod
+    def fit(
+        self,
+        utterances: list[str],
+        labels: ListOfLabels,
+    ) -> None:
+        ...
+
     def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         train_utterances, train_labels = self.get_train_data(context)
         self.fit(train_utterances, train_labels)
@@ -68,7 +76,7 @@ def get_assets(self) -> ScorerArtifact:
         return self._artifact
 
     def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]:
-        return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))
+        return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))  # type: ignore[return-value]
 
     @abstractmethod
     def predict(self, utterances: list[str]) -> npt.NDArray[Any]:
diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py
index 071837cf2..d84b7d6a7 100644
--- a/autointent/modules/scoring/_description/description.py
+++ b/autointent/modules/scoring/_description/description.py
@@ -148,7 +148,7 @@ def clear_cache(self) -> None:
         self._embedder.clear_ram()
 
     def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels, list[str]]:
-        return (
+        return (  # type: ignore[return-value]
             context.data_handler.train_utterances(0),
             context.data_handler.train_labels(0),
             context.data_handler.intent_descriptions,
diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 2dd481bfa..2271f9feb 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -67,7 +67,7 @@ def fit(self, context: Context) -> None:
                     module_kwargs["embedder_name"] = embedder_name
 
                 self._logger.debug("scoring %s module...", module_name)
-                metrics_score = module.score(context, test=False, metrics=self.metrics)
+                metrics_score = module.score(context, metrics=self.metrics)
                 metric_value = metrics_score[self.target_metric]
 
                 context.callback_handler.log_metrics(metrics_score)

From e24bde4c6cebca9432ec3e90db24dc72f370abc3 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 09:23:15 +0300
Subject: [PATCH 16/74] add ingore oos decorator

---
 autointent/metrics/retrieval.py | 134 +++++++-------------------------
 autointent/metrics/scoring.py   |  29 ++++++-
 2 files changed, 58 insertions(+), 105 deletions(-)

diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py
index c9512a96a..567ae60d1 100644
--- a/autointent/metrics/retrieval.py
+++ b/autointent/metrics/retrieval.py
@@ -1,5 +1,6 @@
 """Retrieval metrics."""
 
+from functools import wraps
 from typing import Any, Protocol
 
 import numpy as np
@@ -109,6 +110,21 @@ def _average_precision(query_label: int, candidate_labels: npt.NDArray[np.int64]
     return sum_precision / num_relevant if num_relevant > 0 else 0.0
 
 
+def ignore_oos(func: RetrievalMetricFn) -> RetrievalMetricFn:
+    """Ignore OOS in metrics calculation (decorator)."""
+
+    @wraps(func)
+    def wrapper(query_labels: list[Any | None], candidates_labels: list[Any]) -> float:
+        query_labels_filtered = [lab for lab in query_labels if lab is not None]
+        candidates_labels_filtered = [
+            cand for cand, lab in zip(candidates_labels, query_labels, strict=True) if lab is not None
+        ]
+        return func(query_labels_filtered, candidates_labels_filtered)
+
+    return wrapper
+
+
+@ignore_oos
 def retrieval_map(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None) -> float:
     r"""
     Calculate the mean average precision at position k.
@@ -180,6 +196,7 @@ class of the query :math:`q`,
     return sum_precision / num_relevant if num_relevant > 0 else 0.0
 
 
+@ignore_oos
 def retrieval_map_intersecting(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -215,6 +232,7 @@ def retrieval_map_intersecting(
     return sum(ap_list) / len(ap_list)
 
 
+@ignore_oos
 def retrieval_map_macro(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -235,47 +253,7 @@ def retrieval_map_macro(
     return _macrofy(retrieval_map, query_labels, candidates_labels, k)
 
 
-def _retrieval_map_numpy(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int) -> float:
-    r"""
-    Calculate mean average precision at position k.
-
-    The mean average precision (MAP) at position :math:`k` is calculated as follows:
-
-    .. math::
-
-        \text{AP}_q = \frac{1}{|R_q|} \sum_{i=1}^{k} P_q(i) \cdot \mathbb{1}(y_{\text{true},q} = y_{\text{pred},i})
-
-        \text{MAP}@k = \frac{1}{|Q|} \sum_{q=1}^{Q} \text{AP}_q
-
-    where:
-    - :math:`\text{AP}_q` is the average precision for query :math:`q`,
-    - :math:`P_q(i)` is the precision at the :math:`i`-th position for query :math:`q`,
-    - :math:`\mathbb{1}(y_{\text{true},q} = y_{\text{pred},i})` is the indicator function that equals
-    1 if the true label of the query matches the predicted label at position :math:`i` and 0 otherwise,
-    - :math:`|R_q|` is the total number of relevant items for query :math:`q`,
-    - :math:`|Q|` is the total number of queries.
-
-    :param query_labels: For each query, this list contains its class labels
-    :param candidates_labels: For each query, these lists contain class labels of items ranked by a retrieval model (from most to least relevant)
-    :param k: Number of top items to consider for each query
-    :return: Score of the retrieval metric
-    """  # noqa: E501
-    query_label_, candidates_labels_ = transform(query_labels, candidates_labels)
-    candidates_labels_ = candidates_labels_[:, :k]
-    relevance_mask = candidates_labels_ == query_label_[:, None]
-    cumulative_relevant = np.cumsum(relevance_mask, axis=1)
-    precision_at_k = cumulative_relevant * relevance_mask / np.arange(1, k + 1)
-    sum_precision = np.sum(precision_at_k, axis=1)
-    num_relevant = np.sum(relevance_mask, axis=1)
-    average_precision = np.divide(
-        sum_precision,
-        num_relevant,
-        out=np.zeros_like(sum_precision),
-        where=num_relevant != 0,
-    )
-    return np.mean(average_precision)  # type: ignore[no-any-return]
-
-
+@ignore_oos
 def retrieval_hit_rate(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -315,6 +293,7 @@ def retrieval_hit_rate(
     return float(hit_count / num_queries)
 
 
+@ignore_oos
 def retrieval_hit_rate_intersecting(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -360,6 +339,7 @@ def retrieval_hit_rate_intersecting(
     return float(hit_count / num_queries)
 
 
+@ignore_oos
 def retrieval_hit_rate_macro(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -380,34 +360,7 @@ def retrieval_hit_rate_macro(
     return _macrofy(retrieval_hit_rate, query_labels, candidates_labels, k)
 
 
-def _retrieval_hit_rate_numpy(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int) -> float:
-    r"""
-    Calculate the hit rate at position k.
-
-    The hit rate is calculated as:
-
-    .. math::
-
-        \text{Hit Rate} = \frac{\sum_{i=1}^N \mathbb{1}(y_{\text{query},i} \in y_{\text{candidates},i}^{(1:k)})}{N}
-
-    where:
-    - :math:`N` is the total number of queries,
-    - :math:`y_{\text{query},i}` is the true label for the :math:`i`-th query,
-    - :math:`y_{\text{candidates},i}^{(1:k)}` is the set of top-k predicted labels for the :math:`i`-th query,
-    - :math:`\mathbb{1}(\text{condition})` is the indicator function that equals 1 if the condition
-    is true and 0 otherwise.
-
-    :param query_labels: For each query, this list contains its class labels
-    :param candidates_labels: For each query, these lists contain class labels of items ranked by a retrieval model (from most to least relevant)
-    :param k: Number of top items to consider for each query
-    :return: Score of the retrieval metric
-    """  # noqa: E501
-    query_label_, candidates_labels_ = transform(query_labels, candidates_labels)
-    truncated_candidates = candidates_labels_[:, :k]
-    hit_mask = np.isin(query_label_[:, None], truncated_candidates).any(axis=1)
-    return hit_mask.mean()  # type: ignore[no-any-return]
-
-
+@ignore_oos
 def retrieval_precision(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -449,6 +402,7 @@ def retrieval_precision(
     return float(total_precision / num_queries)
 
 
+@ignore_oos
 def retrieval_precision_intersecting(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -496,6 +450,7 @@ def retrieval_precision_intersecting(
     return float(total_precision / num_queries)
 
 
+@ignore_oos
 def retrieval_precision_macro(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -516,41 +471,6 @@ def retrieval_precision_macro(
     return _macrofy(retrieval_precision, query_labels, candidates_labels, k)
 
 
-def _retrieval_precision_numpy(
-    query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None
-) -> float:
-    r"""
-    Calculate the precision at position k.
-
-    Precision at position :math:`k` is calculated as:
-
-    .. math::
-
-        \text{Precision@k} = \frac{1}{N} \sum_{i=1}^N \frac{\sum_{j=1}^k
-        \mathbb{1}(y_{\text{query},i} = y_{\text{candidates},i,j})}{k}
-
-    where:
-    - :math:`N` is the total number of queries,
-    - :math:`y_{\text{query},i}` is the true label for the :math:`i`-th query,
-    - :math:`y_{\text{candidates},i,j}` is the :math:`j`-th predicted label for the :math:`i`-th query,
-    - :math:`\mathbb{1}(\text{condition})` is the indicator function that equals 1 if the
-    condition is true and 0 otherwise,
-    - :math:`k` is the number of top candidates considered.
-
-    :param query_labels: For each query, this list contains its class labels
-    :param candidates_labels: For each query, these lists contain class labels of items ranked by a retrieval model
-     (from most to least relevant)
-    :param k: Number of top items to consider for each query
-    :return: Score of the retrieval metric
-    """
-    query_label_, candidates_labels_ = transform(query_labels, candidates_labels)
-    top_k_candidates = candidates_labels_[:, :k]
-    matches = (top_k_candidates == query_label_[:, None]).astype(int)
-    relevant_counts = np.sum(matches, axis=1)
-    precision_at_k = relevant_counts / k
-    return np.mean(precision_at_k)  # type: ignore[no-any-return]
-
-
 def _dcg(relevance_scores: npt.NDArray[Any], k: int | None = None) -> float:
     r"""
     Calculate the Discounted Cumulative Gain (DCG) at position k.
@@ -597,6 +517,7 @@ def _idcg(relevance_scores: npt.NDArray[Any], k: int | None = None) -> float:
     return _dcg(ideal_scores, k)
 
 
+@ignore_oos
 def retrieval_ndcg(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None) -> float:
     r"""
     Calculate the Normalized Discounted Cumulative Gain (NDCG) at position k.
@@ -632,6 +553,7 @@ def retrieval_ndcg(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE
     return float(np.mean(ndcg_scores))
 
 
+@ignore_oos
 def retrieval_ndcg_intersecting(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -674,6 +596,7 @@ def retrieval_ndcg_intersecting(
     return np.mean(ndcg_scores)  # type: ignore[return-value]
 
 
+@ignore_oos
 def retrieval_ndcg_macro(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -692,6 +615,7 @@ def retrieval_ndcg_macro(
     return _macrofy(retrieval_ndcg, query_labels, candidates_labels, k)
 
 
+@ignore_oos
 def retrieval_mrr(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None) -> float:
     r"""
     Calculate the Mean Reciprocal Rank (MRR) at position k.
@@ -726,6 +650,7 @@ def retrieval_mrr(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_
     return float(mrr_sum / num_queries)
 
 
+@ignore_oos
 def retrieval_mrr_intersecting(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
@@ -766,6 +691,7 @@ def retrieval_mrr_intersecting(
     return float(mrr_sum / num_queries)
 
 
+@ignore_oos
 def retrieval_mrr_macro(
     query_labels: LABELS_VALUE_TYPE,
     candidates_labels: CANDIDATE_TYPE,
diff --git a/autointent/metrics/scoring.py b/autointent/metrics/scoring.py
index 5a978544e..0c0bc9bb2 100644
--- a/autointent/metrics/scoring.py
+++ b/autointent/metrics/scoring.py
@@ -1,7 +1,8 @@
 """Scoring metrics for multiclass and multilabel classification tasks."""
 
 import logging
-from typing import Protocol
+from functools import wraps
+from typing import Any, Protocol
 
 import numpy as np
 from sklearn.metrics import coverage_error, label_ranking_average_precision_score, label_ranking_loss, roc_auc_score
@@ -29,6 +30,23 @@ def __call__(self, labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> floa
         ...
 
 
+
+
+def ignore_oos(func: ScoringMetricFn) -> ScoringMetricFn:
+    """Ignore OOS in metrics calculation (decorator)."""
+
+    @wraps(func)
+    def wrapper(labels: list[Any | None], scores: list[Any]) -> float:
+        labels_filtered = [lab for lab in labels if lab is not None]
+        scores_filtered = [
+            score for score, lab in zip(scores, labels, strict=True) if lab is not None
+        ]
+        return func(labels_filtered, scores_filtered)
+
+    return wrapper
+
+
+@ignore_oos
 def scoring_log_likelihood(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE, eps: float = 1e-10) -> float:
     r"""
     Supports multiclass and multilabel cases.
@@ -75,6 +93,7 @@ def scoring_log_likelihood(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE,
     return round(float(res), 6)
 
 
+@ignore_oos
 def scoring_roc_auc(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     r"""
     Supports multiclass and multilabel cases.
@@ -126,6 +145,7 @@ def _calculate_decision_metric(
     return res
 
 
+@ignore_oos
 def scoring_accuracy(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     r"""
     Calculate accuracy for multiclass and multilabel classification.
@@ -140,6 +160,7 @@ def scoring_accuracy(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> fl
     return _calculate_decision_metric(decision_accuracy, labels, scores)
 
 
+@ignore_oos
 def scoring_f1(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     r"""
     Calculate the F1 score for multiclass and multilabel classification.
@@ -154,6 +175,7 @@ def scoring_f1(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     return _calculate_decision_metric(decision_f1, labels, scores)
 
 
+@ignore_oos
 def scoring_precision(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     r"""
     Calculate precision for multiclass and multilabel classification.
@@ -168,6 +190,7 @@ def scoring_precision(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> f
     return _calculate_decision_metric(decision_precision, labels, scores)
 
 
+@ignore_oos
 def scoring_recall(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     r"""
     Calculate recall for multiclass and multilabel classification.
@@ -182,6 +205,7 @@ def scoring_recall(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> floa
     return _calculate_decision_metric(decision_recall, labels, scores)
 
 
+@ignore_oos
 def scoring_hit_rate(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     r"""
     Calculate the hit rate for multilabel classification.
@@ -210,6 +234,7 @@ def scoring_hit_rate(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> fl
     return float(np.mean(is_in))
 
 
+@ignore_oos
 def scoring_neg_coverage(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     """
     Supports multilabel classification.
@@ -246,6 +271,7 @@ def scoring_neg_coverage(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -
     return float(1 - (coverage_error(labels, scores) - 1) / (n_classes - 1))
 
 
+@ignore_oos
 def scoring_neg_ranking_loss(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     """
     Supports multilabel.
@@ -262,6 +288,7 @@ def scoring_neg_ranking_loss(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYP
     return float(-label_ranking_loss(labels, scores))
 
 
+@ignore_oos
 def scoring_map(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
     r"""
     Calculate the mean average precision (MAP) score for multilabel classification.

From a74e5dd6542210968670cec17fae804b34dcc49c Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 09:23:33 +0300
Subject: [PATCH 17/74] fix codestyle

---
 autointent/context/_context.py     | 4 +---
 autointent/metrics/scoring.py      | 6 +-----
 autointent/modules/abc/_scoring.py | 3 +--
 3 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/autointent/context/_context.py b/autointent/context/_context.py
index 76baf70c1..09810d0d6 100644
--- a/autointent/context/_context.py
+++ b/autointent/context/_context.py
@@ -81,9 +81,7 @@ def configure_data(self, config: DataConfig) -> None:
         :param config: Configuration for the data handling process.
         """
         self.data_handler = DataHandler(
-            dataset=load_dataset(config.train_path),
-            random_seed=self.seed,
-            scheme=config.scheme
+            dataset=load_dataset(config.train_path), random_seed=self.seed, scheme=config.scheme
         )
 
     def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"]) -> None:
diff --git a/autointent/metrics/scoring.py b/autointent/metrics/scoring.py
index 0c0bc9bb2..773f12ad6 100644
--- a/autointent/metrics/scoring.py
+++ b/autointent/metrics/scoring.py
@@ -30,17 +30,13 @@ def __call__(self, labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> floa
         ...
 
 
-
-
 def ignore_oos(func: ScoringMetricFn) -> ScoringMetricFn:
     """Ignore OOS in metrics calculation (decorator)."""
 
     @wraps(func)
     def wrapper(labels: list[Any | None], scores: list[Any]) -> float:
         labels_filtered = [lab for lab in labels if lab is not None]
-        scores_filtered = [
-            score for score, lab in zip(scores, labels, strict=True) if lab is not None
-        ]
+        scores_filtered = [score for score, lab in zip(scores, labels, strict=True) if lab is not None]
         return func(labels_filtered, scores_filtered)
 
     return wrapper
diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index 5005e4662..bb20b37e1 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -27,8 +27,7 @@ def fit(
         self,
         utterances: list[str],
         labels: ListOfLabels,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
         train_utterances, train_labels = self.get_train_data(context)

From 8b94741f99a6c106e2d8aaabe6382443d9f68780 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 09:31:14 +0300
Subject: [PATCH 18/74] fix typing

---
 autointent/metrics/retrieval.py | 6 +++---
 autointent/metrics/scoring.py   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py
index 567ae60d1..fd622aa87 100644
--- a/autointent/metrics/retrieval.py
+++ b/autointent/metrics/retrieval.py
@@ -114,14 +114,14 @@ def ignore_oos(func: RetrievalMetricFn) -> RetrievalMetricFn:
     """Ignore OOS in metrics calculation (decorator)."""
 
     @wraps(func)
-    def wrapper(query_labels: list[Any | None], candidates_labels: list[Any]) -> float:
+    def wrapper(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE) -> float:
         query_labels_filtered = [lab for lab in query_labels if lab is not None]
         candidates_labels_filtered = [
             cand for cand, lab in zip(candidates_labels, query_labels, strict=True) if lab is not None
         ]
-        return func(query_labels_filtered, candidates_labels_filtered)
+        return func(query_labels_filtered, candidates_labels_filtered)  # type: ignore[arg-type]
 
-    return wrapper
+    return wrapper  # type: ignore[return-value]
 
 
 @ignore_oos
diff --git a/autointent/metrics/scoring.py b/autointent/metrics/scoring.py
index 773f12ad6..ab5023be1 100644
--- a/autointent/metrics/scoring.py
+++ b/autointent/metrics/scoring.py
@@ -2,7 +2,7 @@
 
 import logging
 from functools import wraps
-from typing import Any, Protocol
+from typing import Protocol
 
 import numpy as np
 from sklearn.metrics import coverage_error, label_ranking_average_precision_score, label_ranking_loss, roc_auc_score
@@ -34,10 +34,10 @@ def ignore_oos(func: ScoringMetricFn) -> ScoringMetricFn:
     """Ignore OOS in metrics calculation (decorator)."""
 
     @wraps(func)
-    def wrapper(labels: list[Any | None], scores: list[Any]) -> float:
+    def wrapper(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float:
         labels_filtered = [lab for lab in labels if lab is not None]
         scores_filtered = [score for score, lab in zip(scores, labels, strict=True) if lab is not None]
-        return func(labels_filtered, scores_filtered)
+        return func(labels_filtered, scores_filtered)  # type: ignore[arg-type]
 
     return wrapper
 

From cfb2f25444e0627a63e766419444a50578eb369a Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 09:43:11 +0300
Subject: [PATCH 19/74] add oos handling to cv iterator

---
 autointent/context/data_handler/_data_handler.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index e5f9ba361..27e3b37d7 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -169,7 +169,9 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
         return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
-    def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]:
+    def validation_iterator(
+        self
+    ) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]:
         if self.scheme == "ho":
             msg = "Cannot call cross-validation on hold-out DataHandler"
             raise RuntimeError(msg)
@@ -179,11 +181,14 @@ def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[s
             val_labels = self.train_labels(j)
             train_folds = [i for i in range(self.n_folds) if i != j]
             train_utterances = [ut for i_fold in train_folds for ut in self.train_utterances(i_fold)]
-            train_labels = [ut for i_fold in train_folds for ut in self.train_labels(i_fold)]
-            yield train_utterances, train_labels, val_utterances, val_labels  # type: ignore[misc]
+            train_labels = [lab for i_fold in train_folds for lab in self.train_labels(i_fold)]
 
-        msg = "something's wrong"
-        raise RuntimeError(msg)
+            # filter out all OOS samples from train
+            train_utterances = [
+                ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None
+            ]
+            train_labels = [lab for lab in train_labels if lab is not None]
+            yield train_utterances, train_labels, val_utterances, val_labels  # type: ignore[misc]
 
     def dump(self, filepath: str | Path) -> None:
         """

From e1d41a9cd02fec08c3f6b5f18ce97e6e49782b11 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 09:43:41 +0300
Subject: [PATCH 20/74] remove `DataHandler.dump()`

---
 autointent/context/data_handler/_data_handler.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index 27e3b37d7..b16838246 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -2,7 +2,6 @@
 
 import logging
 from collections.abc import Generator
-from pathlib import Path
 from typing import Literal, TypedDict, cast
 
 from datasets import concatenate_datasets
@@ -190,14 +189,6 @@ def validation_iterator(
             train_labels = [lab for lab in train_labels if lab is not None]
             yield train_utterances, train_labels, val_utterances, val_labels  # type: ignore[misc]
 
-    def dump(self, filepath: str | Path) -> None:
-        """
-        Save the dataset splits and intents to a JSON file.
-
-        :param filepath: The path to the file where the JSON data will be saved.
-        """
-        self.dataset.to_json(filepath)
-
     def _split_ho(self, random_seed: int, split_train: bool) -> None:
         has_validation_split = any(split.startswith(Split.VALIDATION) for split in self.dataset)
 

From 9668dc69e0617fde01dbe300cbc9cc8afb820596 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 09:44:17 +0300
Subject: [PATCH 21/74] minor bug fix

---
 autointent/context/_context.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/context/_context.py b/autointent/context/_context.py
index 09810d0d6..3e83db5a7 100644
--- a/autointent/context/_context.py
+++ b/autointent/context/_context.py
@@ -137,7 +137,7 @@ def dump(self) -> None:
         # self._logger.info(make_report(optimization_results, nodes=nodes))
 
         # dump train and test data splits
-        self.data_handler.dump(logs_dir / "dataset.json")
+        self.data_handler.dataset.to_json(logs_dir / "dataset.json")
 
         self._logger.info("logs and other assets are saved to %s", logs_dir)
 

From 61130bba1d16ab17eea539654f0a441e2d114127 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 10:06:27 +0300
Subject: [PATCH 22/74] implement splitting to cv folds

---
 .../context/data_handler/_data_handler.py     | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index b16838246..58fb3a494 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -168,9 +168,7 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels:
         split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST
         return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature])
 
-    def validation_iterator(
-        self
-    ) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]:
+    def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]:
         if self.scheme == "ho":
             msg = "Cannot call cross-validation on hold-out DataHandler"
             raise RuntimeError(msg)
@@ -183,9 +181,7 @@ def validation_iterator(
             train_labels = [lab for i_fold in train_folds for lab in self.train_labels(i_fold)]
 
             # filter out all OOS samples from train
-            train_utterances = [
-                ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None
-            ]
+            train_utterances = [ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None]
             train_labels = [lab for lab in train_labels if lab is not None]
             yield train_utterances, train_labels, val_utterances, val_labels  # type: ignore[misc]
 
@@ -252,6 +248,26 @@ def _split_validation_from_test(self, random_seed: int) -> None:
             allow_oos_in_train=True,  # both test and validation splits can contain OOS
         )
 
+    def _split_cv(self, random_seed: int) -> None:
+        self.dataset[Split.TRAIN] = concatenate_datasets([
+            self.dataset[split_name] for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]
+        ])
+
+        if Split.TEST not in self.dataset:
+            self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset(
+                self.dataset, split=Split.TRAIN, test_size=0.2, random_seed=random_seed, allow_oos_in_train=True
+            )
+
+        for j in range(self.n_folds - 1):
+            self.dataset[Split.TRAIN], self.dataset[f"{Split.TRAIN}_{j}"] = split_dataset(
+                self.dataset,
+                split=Split.TRAIN,
+                test_size=1 / (self.n_folds - j),
+                random_seed=random_seed,
+                allow_oos_in_train=True,
+            )
+        self.dataset[f"{Split.TRAIN}_{self.n_folds-1}"] = self.dataset.pop(Split.TRAIN)
+
     def _split_validation_from_train(self, random_seed: int) -> None:
         if Split.TRAIN in self.dataset:
             self.dataset[Split.TRAIN], self.dataset[Split.VALIDATION] = split_dataset(

From 30847185bad87848dc422844ea4f953cb7a7aa23 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 10:06:52 +0300
Subject: [PATCH 23/74] fix codestyle

---
 autointent/context/data_handler/_data_handler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index 58fb3a494..fbb7c6987 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -249,9 +249,9 @@ def _split_validation_from_test(self, random_seed: int) -> None:
         )
 
     def _split_cv(self, random_seed: int) -> None:
-        self.dataset[Split.TRAIN] = concatenate_datasets([
-            self.dataset[split_name] for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]
-        ])
+        self.dataset[Split.TRAIN] = concatenate_datasets(
+            [self.dataset[split_name] for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]]
+        )
 
         if Split.TEST not in self.dataset:
             self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset(

From ae1383419dba768fb4b4603a893f19711cba6238 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 10:35:01 +0300
Subject: [PATCH 24/74] remove regex tests

---
 tests/configs/test_regex.py | 45 -------------------------------------
 tests/modules/test_regex.py | 42 ----------------------------------
 2 files changed, 87 deletions(-)
 delete mode 100644 tests/configs/test_regex.py
 delete mode 100644 tests/modules/test_regex.py

diff --git a/tests/configs/test_regex.py b/tests/configs/test_regex.py
deleted file mode 100644
index 0e24bb11e..000000000
--- a/tests/configs/test_regex.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import pytest
-from pydantic import ValidationError
-
-from autointent.nodes import OptimizationConfig
-
-
-@pytest.fixture
-def valid_regexp_config():
-    """Fixture for a valid RegExp node configuration."""
-    return [
-        {"node_type": "regexp", "target_metric": "regexp_partial_accuracy", "search_space": [{"module_name": "regexp"}]}
-    ]
-
-
-def test_valid_regexp_config(valid_regexp_config):
-    """Test that a valid RegExp config passes validation."""
-    config = OptimizationConfig(valid_regexp_config)
-    assert config[0].node_type == "regexp"
-    assert config[0].target_metric == "regexp_partial_accuracy"
-    assert isinstance(config[0].search_space, list)
-    assert config[0].search_space[0].module_name == "regexp"
-
-
-def test_invalid_regexp_config_missing_field():
-    """Test that a missing required field raises ValidationError."""
-    invalid_config = {
-        "node_type": "regexp",
-        # Missing "target_metric"
-        "search_space": [{"module_name": "regexp"}],
-    }
-
-    with pytest.raises(ValidationError):
-        OptimizationConfig(invalid_config)
-
-
-def test_invalid_regexp_config_wrong_type():
-    """Test that an invalid field type raises ValidationError."""
-    invalid_config = {
-        "node_type": "regexp",
-        "target_metric": "regexp_partial_accuracy",
-        "search_space": "should_be_a_list",  # Should be a list of dicts
-    }
-
-    with pytest.raises(ValidationError):
-        OptimizationConfig(invalid_config)
diff --git a/tests/modules/test_regex.py b/tests/modules/test_regex.py
deleted file mode 100644
index 83912fe13..000000000
--- a/tests/modules/test_regex.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import pytest
-
-from autointent.modules.regexp import RegExp
-
-
-@pytest.mark.parametrize(
-    ("partial_match", "expected_predictions"),
-    [(".*", [[0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]), ("frozen", [[0], [0], [0], [0], [0, 1]])],
-)
-def test_base_regex(partial_match, expected_predictions):
-    train_data = [
-        {
-            "id": 0,
-            "name": "accept_reservations",
-            "regexp_full_match": [".*"],
-            "regexp_partial_match": [".*"],
-        },
-        {
-            "id": 1,
-            "name": "account_blocked",
-            "regexp_partial_match": [partial_match],
-        },
-    ]
-
-    matcher = RegExp()
-    matcher.fit(train_data)
-
-    test_data = [
-        "why is there a hold on my american saving bank account",
-        "i am nost sure why my account is blocked",
-        "why is there a hold on my capital one checking account",
-        "i think my account is blocked but i do not know the reason",
-        "can you tell me why is my bank account frozen",
-    ]
-    predictions = matcher.predict(test_data)
-    assert predictions == expected_predictions
-
-    predictions, metadata = matcher.predict_with_metadata(test_data)
-    assert len(predictions) == len(test_data) == len(metadata)
-
-    assert "partial_matches" in metadata[0]
-    assert "full_matches" in metadata[0]

From 4c49809fc1fd30a14386ed498a504240a86b95bb Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 10:36:04 +0300
Subject: [PATCH 25/74] bug fix

---
 autointent/context/_context.py                   | 2 +-
 autointent/context/data_handler/_data_handler.py | 2 +-
 autointent/metrics/retrieval.py                  | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/autointent/context/_context.py b/autointent/context/_context.py
index 3e83db5a7..dd73db842 100644
--- a/autointent/context/_context.py
+++ b/autointent/context/_context.py
@@ -84,7 +84,7 @@ def configure_data(self, config: DataConfig) -> None:
             dataset=load_dataset(config.train_path), random_seed=self.seed, scheme=config.scheme
         )
 
-    def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"]) -> None:
+    def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> None:
         """
         Set the datasets for training, validation and testing.
 
diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index fbb7c6987..ca455f213 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -32,7 +32,7 @@ class DataHandler:  # TODO rename to Validator
     def __init__(
         self,
         dataset: Dataset,
-        scheme: Literal["cv", "ho"],
+        scheme: Literal["cv", "ho"] = "ho",
         split_train: bool = True,
         random_seed: int = 0,
         n_folds: int = 3,
diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py
index fd622aa87..d208482c7 100644
--- a/autointent/metrics/retrieval.py
+++ b/autointent/metrics/retrieval.py
@@ -114,12 +114,12 @@ def ignore_oos(func: RetrievalMetricFn) -> RetrievalMetricFn:
     """Ignore OOS in metrics calculation (decorator)."""
 
     @wraps(func)
-    def wrapper(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE) -> float:
+    def wrapper(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None) -> float:
         query_labels_filtered = [lab for lab in query_labels if lab is not None]
         candidates_labels_filtered = [
             cand for cand, lab in zip(candidates_labels, query_labels, strict=True) if lab is not None
         ]
-        return func(query_labels_filtered, candidates_labels_filtered)  # type: ignore[arg-type]
+        return func(query_labels_filtered, candidates_labels_filtered, k)  # type: ignore[arg-type]
 
     return wrapper  # type: ignore[return-value]
 

From 2f8642f4370895e99d23aab3ec021410a28563e3 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 10:46:32 +0300
Subject: [PATCH 26/74] bug fix

---
 autointent/modules/abc/_scoring.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py
index bb20b37e1..4871de547 100644
--- a/autointent/modules/abc/_scoring.py
+++ b/autointent/modules/abc/_scoring.py
@@ -30,8 +30,7 @@ def fit(
     ) -> None: ...
 
     def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]:
-        train_utterances, train_labels = self.get_train_data(context)
-        self.fit(train_utterances, train_labels)
+        self.fit(*self.get_train_data(context))
 
         val_utterances = context.data_handler.validation_utterances(0)
         val_labels = context.data_handler.validation_labels(0)
@@ -75,7 +74,7 @@ def get_assets(self) -> ScorerArtifact:
         return self._artifact
 
     def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]:
-        return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0))  # type: ignore[return-value]
+        return context.data_handler.train_utterances(0), context.data_handler.train_labels(0)  # type: ignore[return-value]
 
     @abstractmethod
     def predict(self, utterances: list[str]) -> npt.NDArray[Any]:

From 13e63d17b9624de6ef43862eb141bd12adab4d7d Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 10:46:37 +0300
Subject: [PATCH 27/74] update tests

---
 tests/nodes/test_logreg.py    | 4 ++--
 tests/nodes/test_retrieval.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/nodes/test_logreg.py b/tests/nodes/test_logreg.py
index 162ef7af5..4f4dd4337 100644
--- a/tests/nodes/test_logreg.py
+++ b/tests/nodes/test_logreg.py
@@ -24,7 +24,7 @@ def test_embedding_multiclass():
             load_path=trial.module_dump_dir,
         )
         node = InferenceNode.from_config(config)
-        scores = node.module.score(context, "validation", [metric])
+        scores = node.module.score(context, [metric])
         assert isinstance(scores, dict)
         node.module.clear_cache()
         gc.collect()
@@ -44,7 +44,7 @@ def test_embedding_multilabel():
             load_path=trial.module_dump_dir,
         )
         node = InferenceNode.from_config(config)
-        scores = node.module.score(context, "validation", [metric])
+        scores = node.module.score(context, [metric])
         assert isinstance(scores, dict)
         node.module.clear_cache()
         gc.collect()
diff --git a/tests/nodes/test_retrieval.py b/tests/nodes/test_retrieval.py
index 43156de06..987b58b5a 100644
--- a/tests/nodes/test_retrieval.py
+++ b/tests/nodes/test_retrieval.py
@@ -24,7 +24,7 @@ def test_embedding_multiclass():
             load_path=trial.module_dump_dir,
         )
         node = InferenceNode.from_config(config)
-        labels, distances, texts = node.module.predict(["hello", "card"])
+        node.module.predict(["hello", "card"])
         node.module.clear_cache()
         gc.collect()
         torch.cuda.empty_cache()
@@ -43,7 +43,7 @@ def test_embedding_multilabel():
             load_path=trial.module_dump_dir,
         )
         node = InferenceNode.from_config(config)
-        labels, distances, texts = node.module.predict(["hello", "card"])
+        node.module.predict(["hello", "card"])
         node.module.clear_cache()
         gc.collect()
         torch.cuda.empty_cache()

From 5ac89db77e48847ba8cc625d066ae7e02a0ff341 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 10:47:09 +0300
Subject: [PATCH 28/74] fix typing

---
 autointent/metrics/retrieval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py
index d208482c7..a9546ce9a 100644
--- a/autointent/metrics/retrieval.py
+++ b/autointent/metrics/retrieval.py
@@ -121,7 +121,7 @@ def wrapper(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE,
         ]
         return func(query_labels_filtered, candidates_labels_filtered, k)  # type: ignore[arg-type]
 
-    return wrapper  # type: ignore[return-value]
+    return wrapper
 
 
 @ignore_oos

From d4bf50f015f9350bee3d80608e5e31266642c0f2 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 11:38:36 +0300
Subject: [PATCH 29/74] big fix

---
 autointent/context/data_handler/_data_handler.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index ca455f213..fce8970a7 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -249,9 +249,11 @@ def _split_validation_from_test(self, random_seed: int) -> None:
         )
 
     def _split_cv(self, random_seed: int) -> None:
-        self.dataset[Split.TRAIN] = concatenate_datasets(
-            [self.dataset[split_name] for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]]
-        )
+        extra_splits = [split_name for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]]
+        if extra_splits:
+            self.dataset[Split.TRAIN] = concatenate_datasets(
+                [self.dataset.pop(split_name) for split_name in extra_splits]
+            )
 
         if Split.TEST not in self.dataset:
             self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset(

From 74dcb9839f889ff5763ec0b3be5ae828d6a2fe62 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 11:38:45 +0300
Subject: [PATCH 30/74] basic test on cv folding

---
 tests/data/test_data_handler.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/data/test_data_handler.py b/tests/data/test_data_handler.py
index e93dba075..ad6f22cd4 100644
--- a/tests/data/test_data_handler.py
+++ b/tests/data/test_data_handler.py
@@ -173,3 +173,22 @@ def test_dataset_validation(mapping):
 def test_intents_validation(mapping):
     with pytest.raises(ValueError):  # noqa: PT011
         Dataset.from_dict(mapping)
+
+
+def count_oos(split):
+    return len(split.filter(lambda sample: sample["label"] is None))
+
+
+def test_cv_folding(dataset):
+    DataHandler(dataset, scheme="cv", n_folds=3)
+
+    desired_specs = {
+        "test": {"total": 12, "oos": 4},
+        "train_0": {"total": 16, "oos": 5},
+        "train_1": {"total": 16, "oos": 5},
+        "train_2": {"total": 16, "oos": 6},
+    }
+
+    for split_name in dataset:
+        assert len(dataset[split_name]) == desired_specs[split_name]["total"]
+        assert count_oos(dataset[split_name]) == desired_specs[split_name]["oos"]

From f13592b52f265883c9acbe6a2809a23fbd5e117b Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 11:51:16 +0300
Subject: [PATCH 31/74] add tests for metrics to ignore oos samples

---
 tests/metrics/test_retrieval_metrics.py              | 11 +++++++++++
 tests/metrics/test_retrieval_metrics_intersecting.py | 10 ++++++++++
 2 files changed, 21 insertions(+)

diff --git a/tests/metrics/test_retrieval_metrics.py b/tests/metrics/test_retrieval_metrics.py
index 84a55818f..59860831b 100644
--- a/tests/metrics/test_retrieval_metrics.py
+++ b/tests/metrics/test_retrieval_metrics.py
@@ -89,3 +89,14 @@ def test_ndcg(query_labels, candidates_labels, k, ground_truth):
 def test_mrr(query_labels, candidates_labels, k, ground_truth):
     output = retrieval_mrr(query_labels, candidates_labels, k)
     np.testing.assert_almost_equal(output, ground_truth)
+
+
+@pytest.mark.parametrize(
+        ("query_labels", "candidates_labels", "ground_truth"),
+        [
+            ([0,1,2,3], [[0,1,2], [0,1,2], [0,1,2], [0,1,2]], 0.75),
+            ([0,1,2,None], [[0,1,2], [0,1,2], [0,1,2], [0,1,2]], 1),
+        ]
+)
+def test_oos_ignoring(query_labels, candidates_labels, ground_truth):
+    assert ground_truth == retrieval_hit_rate(query_labels, candidates_labels)
diff --git a/tests/metrics/test_retrieval_metrics_intersecting.py b/tests/metrics/test_retrieval_metrics_intersecting.py
index b4d912878..279687204 100644
--- a/tests/metrics/test_retrieval_metrics_intersecting.py
+++ b/tests/metrics/test_retrieval_metrics_intersecting.py
@@ -121,3 +121,13 @@ def test_ndcg(query_labels, candidates_labels, k, ground_truth):
 def test_mrr(query_labels, candidates_labels, k, ground_truth):
     output = retrieval_mrr_intersecting(query_labels, candidates_labels, k)
     np.testing.assert_almost_equal(output, ground_truth)
+
+@pytest.mark.parametrize(
+        ("query_labels", "candidates_labels", "ground_truth"),
+        [
+            ([[0,1],[0,1],[0,1],[0,1]], [[[0,1],[0,1]], [[0,1],[0,1]], [[0,1],[0,1]], [[1,0],[1,0]]], 0.75),
+            ([[0,1],[0,1],[0,1],None], [[[0,1],[0,1]], [[0,1],[0,1]], [[0,1],[0,1]], [[1,0],[1,0]]], 1.0),
+        ]
+)
+def test_oos_ignoring(query_labels, candidates_labels, ground_truth):
+    assert ground_truth == retrieval_hit_rate_intersecting(query_labels, candidates_labels)

From 080d7cc1ae1065676944d31238a7ced3c88de456 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 12:00:58 +0300
Subject: [PATCH 32/74] add tests for cv iterator

---
 tests/data/test_data_handler.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tests/data/test_data_handler.py b/tests/data/test_data_handler.py
index ad6f22cd4..1df3bdae4 100644
--- a/tests/data/test_data_handler.py
+++ b/tests/data/test_data_handler.py
@@ -192,3 +192,31 @@ def test_cv_folding(dataset):
     for split_name in dataset:
         assert len(dataset[split_name]) == desired_specs[split_name]["total"]
         assert count_oos(dataset[split_name]) == desired_specs[split_name]["oos"]
+
+
+def count_oos_labels(split):
+    return sum(sample is None for sample in split)
+
+def test_cv_iterator(dataset):
+    dh = DataHandler(dataset, scheme="cv", n_folds=3)
+
+    desired_specs = [{
+        "train": {"total": 21, "oos": 0},
+        "val": {"total": 16, "oos": 5},
+    },
+    {
+        "train": {"total": 21, "oos": 0},
+        "val": {"total": 16, "oos": 5},
+    },
+    {
+        "train": {"total": 22, "oos": 0},
+        "val": {"total": 16, "oos": 6},
+    },
+    ]
+
+    for i, (x_train, y_train, x_val, y_val) in enumerate(dh.validation_iterator()):
+        specs = desired_specs[i]
+        assert len(x_train) == len(y_train) == specs["train"]["total"]
+        assert count_oos_labels(y_train) == specs["train"]["oos"]
+        assert len(x_val) == len(y_val) == specs["val"]["total"]
+        assert count_oos_labels(y_val) == specs["val"]["oos"]

From bee4e73b8b0d0b01b2ef0f782eaf1163e3b98032 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 5 Feb 2025 12:01:18 +0300
Subject: [PATCH 33/74] fix codestyle

---
 tests/data/test_data_handler.py               | 26 ++++++++++---------
 tests/metrics/test_retrieval_metrics.py       | 10 +++----
 .../test_retrieval_metrics_intersecting.py    | 15 +++++++----
 3 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/tests/data/test_data_handler.py b/tests/data/test_data_handler.py
index 1df3bdae4..8e4f60cd7 100644
--- a/tests/data/test_data_handler.py
+++ b/tests/data/test_data_handler.py
@@ -197,21 +197,23 @@ def test_cv_folding(dataset):
 def count_oos_labels(split):
     return sum(sample is None for sample in split)
 
+
 def test_cv_iterator(dataset):
     dh = DataHandler(dataset, scheme="cv", n_folds=3)
 
-    desired_specs = [{
-        "train": {"total": 21, "oos": 0},
-        "val": {"total": 16, "oos": 5},
-    },
-    {
-        "train": {"total": 21, "oos": 0},
-        "val": {"total": 16, "oos": 5},
-    },
-    {
-        "train": {"total": 22, "oos": 0},
-        "val": {"total": 16, "oos": 6},
-    },
+    desired_specs = [
+        {
+            "train": {"total": 21, "oos": 0},
+            "val": {"total": 16, "oos": 5},
+        },
+        {
+            "train": {"total": 21, "oos": 0},
+            "val": {"total": 16, "oos": 5},
+        },
+        {
+            "train": {"total": 22, "oos": 0},
+            "val": {"total": 16, "oos": 6},
+        },
     ]
 
     for i, (x_train, y_train, x_val, y_val) in enumerate(dh.validation_iterator()):
diff --git a/tests/metrics/test_retrieval_metrics.py b/tests/metrics/test_retrieval_metrics.py
index 59860831b..86e4a3740 100644
--- a/tests/metrics/test_retrieval_metrics.py
+++ b/tests/metrics/test_retrieval_metrics.py
@@ -92,11 +92,11 @@ def test_mrr(query_labels, candidates_labels, k, ground_truth):
 
 
 @pytest.mark.parametrize(
-        ("query_labels", "candidates_labels", "ground_truth"),
-        [
-            ([0,1,2,3], [[0,1,2], [0,1,2], [0,1,2], [0,1,2]], 0.75),
-            ([0,1,2,None], [[0,1,2], [0,1,2], [0,1,2], [0,1,2]], 1),
-        ]
+    ("query_labels", "candidates_labels", "ground_truth"),
+    [
+        ([0, 1, 2, 3], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], 0.75),
+        ([0, 1, 2, None], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], 1),
+    ],
 )
 def test_oos_ignoring(query_labels, candidates_labels, ground_truth):
     assert ground_truth == retrieval_hit_rate(query_labels, candidates_labels)
diff --git a/tests/metrics/test_retrieval_metrics_intersecting.py b/tests/metrics/test_retrieval_metrics_intersecting.py
index 279687204..5ecca6603 100644
--- a/tests/metrics/test_retrieval_metrics_intersecting.py
+++ b/tests/metrics/test_retrieval_metrics_intersecting.py
@@ -122,12 +122,17 @@ def test_mrr(query_labels, candidates_labels, k, ground_truth):
     output = retrieval_mrr_intersecting(query_labels, candidates_labels, k)
     np.testing.assert_almost_equal(output, ground_truth)
 
+
 @pytest.mark.parametrize(
-        ("query_labels", "candidates_labels", "ground_truth"),
-        [
-            ([[0,1],[0,1],[0,1],[0,1]], [[[0,1],[0,1]], [[0,1],[0,1]], [[0,1],[0,1]], [[1,0],[1,0]]], 0.75),
-            ([[0,1],[0,1],[0,1],None], [[[0,1],[0,1]], [[0,1],[0,1]], [[0,1],[0,1]], [[1,0],[1,0]]], 1.0),
-        ]
+    ("query_labels", "candidates_labels", "ground_truth"),
+    [
+        (
+            [[0, 1], [0, 1], [0, 1], [0, 1]],
+            [[[0, 1], [0, 1]], [[0, 1], [0, 1]], [[0, 1], [0, 1]], [[1, 0], [1, 0]]],
+            0.75,
+        ),
+        ([[0, 1], [0, 1], [0, 1], None], [[[0, 1], [0, 1]], [[0, 1], [0, 1]], [[0, 1], [0, 1]], [[1, 0], [1, 0]]], 1.0),
+    ],
 )
 def test_oos_ignoring(query_labels, candidates_labels, ground_truth):
     assert ground_truth == retrieval_hit_rate_intersecting(query_labels, candidates_labels)

From 75e47c8b826adfe5d79ff500b7c92748a9dd7659 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 10:51:07 +0300
Subject: [PATCH 34/74] minor bug fix

---
 autointent/modules/abc/_decision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py
index 6d78d050a..dc128a11f 100644
--- a/autointent/modules/abc/_decision.py
+++ b/autointent/modules/abc/_decision.py
@@ -80,7 +80,7 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
             val_scores = scores[j]
             train_folds = [i for i in range(context.data_handler.n_folds) if i != j]
             train_labels = [ut for i_fold in train_folds for ut in labels[i_fold]]
-            train_scores = [ut for i_fold in train_folds for ut in scores[i_fold]]
+            train_scores = np.array([sc for i_fold in train_folds for sc in scores[i_fold]])
             self.fit(train_scores, train_labels, context.data_handler.tags)  # type: ignore[arg-type]
             val_decisions = self.predict(val_scores)
             for name, fn in chosen_metrics.items():

From f64ee3b3f450db408fff4f00960437c359ceb8fc Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 12:02:12 +0300
Subject: [PATCH 35/74] fix codestyle

---
 autointent/generation/utterances/__init__.py  | 42 +++++++++----------
 .../utterances/evolution/__init__.py          | 34 +++++++--------
 .../evolution/chat_templates/concrete.py      |  5 +--
 .../evolution/chat_templates/goofy.py         |  3 +-
 4 files changed, 40 insertions(+), 44 deletions(-)

diff --git a/autointent/generation/utterances/__init__.py b/autointent/generation/utterances/__init__.py
index 7534eb1a0..006a163df 100644
--- a/autointent/generation/utterances/__init__.py
+++ b/autointent/generation/utterances/__init__.py
@@ -1,28 +1,28 @@
 from .basic import SynthesizerChatTemplate, UtteranceGenerator
 from .evolution import (
-                        AbstractEvolution,
-                        ConcreteEvolution,
-                        EvolutionChatTemplate,
-                        FormalEvolution,
-                        FunnyEvolution,
-                        GoofyEvolution,
-                        InformalEvolution,
-                        ReasoningEvolution,
-                        UtteranceEvolver,
+    AbstractEvolution,
+    ConcreteEvolution,
+    EvolutionChatTemplate,
+    FormalEvolution,
+    FunnyEvolution,
+    GoofyEvolution,
+    InformalEvolution,
+    ReasoningEvolution,
+    UtteranceEvolver,
 )
 from .generator import Generator
 
 __all__ = [
-                        "AbstractEvolution",
-                        "ConcreteEvolution",
-                        "EvolutionChatTemplate",
-                        "FormalEvolution",
-                        "FunnyEvolution",
-                        "Generator",
-                        "GoofyEvolution",
-                        "InformalEvolution",
-                        "ReasoningEvolution",
-                        "SynthesizerChatTemplate",
-                        "UtteranceEvolver",
-                        "UtteranceGenerator",
+    "AbstractEvolution",
+    "ConcreteEvolution",
+    "EvolutionChatTemplate",
+    "FormalEvolution",
+    "FunnyEvolution",
+    "Generator",
+    "GoofyEvolution",
+    "InformalEvolution",
+    "ReasoningEvolution",
+    "SynthesizerChatTemplate",
+    "UtteranceEvolver",
+    "UtteranceGenerator",
 ]
diff --git a/autointent/generation/utterances/evolution/__init__.py b/autointent/generation/utterances/evolution/__init__.py
index 7e352bd86..596d83a3f 100644
--- a/autointent/generation/utterances/evolution/__init__.py
+++ b/autointent/generation/utterances/evolution/__init__.py
@@ -1,23 +1,23 @@
 from .chat_templates import (
-                             AbstractEvolution,
-                             ConcreteEvolution,
-                             EvolutionChatTemplate,
-                             FormalEvolution,
-                             FunnyEvolution,
-                             GoofyEvolution,
-                             InformalEvolution,
-                             ReasoningEvolution,
+    AbstractEvolution,
+    ConcreteEvolution,
+    EvolutionChatTemplate,
+    FormalEvolution,
+    FunnyEvolution,
+    GoofyEvolution,
+    InformalEvolution,
+    ReasoningEvolution,
 )
 from .evolver import UtteranceEvolver
 
 __all__ = [
-                             "AbstractEvolution",
-                             "ConcreteEvolution",
-                             "EvolutionChatTemplate",
-                             "FormalEvolution",
-                             "FunnyEvolution",
-                             "GoofyEvolution",
-                             "InformalEvolution",
-                             "ReasoningEvolution",
-                             "UtteranceEvolver",
+    "AbstractEvolution",
+    "ConcreteEvolution",
+    "EvolutionChatTemplate",
+    "FormalEvolution",
+    "FunnyEvolution",
+    "GoofyEvolution",
+    "InformalEvolution",
+    "ReasoningEvolution",
+    "UtteranceEvolver",
 ]
diff --git a/autointent/generation/utterances/evolution/chat_templates/concrete.py b/autointent/generation/utterances/evolution/chat_templates/concrete.py
index 4a7ab52f2..dcca78bac 100644
--- a/autointent/generation/utterances/evolution/chat_templates/concrete.py
+++ b/autointent/generation/utterances/evolution/chat_templates/concrete.py
@@ -29,10 +29,7 @@ class ConcreteEvolution(EvolutionChatTemplate):
         Message(role=Role.ASSISTANT, content="I want to reserve a table for 4 persons at 9 pm."),
         Message(
             role=Role.USER,
-            content=(
-                "Intent name: requesting technical support\n"
-                "Utterance: I'm having trouble with my laptop."
-            ),
+            content=("Intent name: requesting technical support\n" "Utterance: I'm having trouble with my laptop."),
         ),
         Message(role=Role.ASSISTANT, content="My laptop is constantly rebooting and overheating."),
     ]
diff --git a/autointent/generation/utterances/evolution/chat_templates/goofy.py b/autointent/generation/utterances/evolution/chat_templates/goofy.py
index 15a6fcb17..c53156054 100644
--- a/autointent/generation/utterances/evolution/chat_templates/goofy.py
+++ b/autointent/generation/utterances/evolution/chat_templates/goofy.py
@@ -36,8 +36,7 @@ class GoofyEvolution(EvolutionChatTemplate):
             ),
         ),
         Message(
-            role=Role.ASSISTANT,
-            content="My laptop's having an existential crisis—keeps rebooting and melting. Help!"
+            role=Role.ASSISTANT, content="My laptop's having an existential crisis—keeps rebooting and melting. Help!"
         ),
     ]
 

From 39e69d83682570c316963e88b39137384f299e17 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 12:30:23 +0300
Subject: [PATCH 36/74] add test for cv

---
 tests/pipeline/test_optimization.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py
index 050eca742..6dd35b690 100644
--- a/tests/pipeline/test_optimization.py
+++ b/tests/pipeline/test_optimization.py
@@ -18,35 +18,37 @@
     "task_type",
     ["multiclass", "multilabel", "description"],
 )
-def test_no_context_optimization(dataset, task_type):
+def test_cv(dataset, task_type):
     project_dir = setup_environment()
     search_space = get_search_space(task_type)
 
     pipeline_optimizer = Pipeline.from_search_space(search_space)
 
-    pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=False))
+    pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True, clear_ram=True))
     pipeline_optimizer.set_config(VectorIndexConfig())
     pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu"))
 
     if task_type == "multilabel":
         dataset = dataset.to_multilabel()
 
-    context = pipeline_optimizer.fit(dataset)
+    context = pipeline_optimizer.fit(dataset, scheme="cv")
     context.dump()
 
+    assert os.listdir(pipeline_optimizer.logging_config.dump_dir)
+
 
 @pytest.mark.parametrize(
     "task_type",
     ["multiclass", "multilabel", "description"],
 )
-def test_save_db(dataset, task_type):
+def test_no_context_optimization(dataset, task_type):
     project_dir = setup_environment()
     search_space = get_search_space(task_type)
 
     pipeline_optimizer = Pipeline.from_search_space(search_space)
 
-    pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=False))
-    pipeline_optimizer.set_config(VectorIndexConfig(save_db=True))
+    pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=False, clear_ram=False))
+    pipeline_optimizer.set_config(VectorIndexConfig())
     pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu"))
 
     if task_type == "multilabel":
@@ -66,7 +68,7 @@ def test_dump_modules(dataset, task_type):
 
     pipeline_optimizer = Pipeline.from_search_space(search_space)
 
-    pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True))
+    pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True, clear_ram=True))
     pipeline_optimizer.set_config(VectorIndexConfig())
     pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu"))
 

From ef11594efac8370b442c2f573202bb6049265c89 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 12:30:28 +0300
Subject: [PATCH 37/74] bug fix

---
 autointent/modules/abc/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index 2674510eb..d06fe1018 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -195,7 +195,7 @@ def _get_task_specs(labels: ListOfGenericLabels) -> tuple[int, bool, bool]:
         contains_oos_samples = any(label is None for label in labels)
         in_domain_label = next(lab for lab in labels if lab is not None)
         multilabel = isinstance(in_domain_label, list)
-        n_classes = len(labels[0]) if multilabel else len(set(labels).difference([None]))  # type: ignore[arg-type]
+        n_classes = len(in_domain_label) if multilabel else len(set(labels).difference([None]))  # type: ignore[arg-type]
         return n_classes, multilabel, contains_oos_samples
 
     @abstractmethod

From bffffb1b72d9aa6fb7eba9002a52c8835baaf13d Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 13:25:03 +0300
Subject: [PATCH 38/74] implement cv iterator for description scorer

---
 .../scoring/_description/description.py       | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py
index d84b7d6a7..28cd84d9a 100644
--- a/autointent/modules/scoring/_description/description.py
+++ b/autointent/modules/scoring/_description/description.py
@@ -8,7 +8,9 @@
 from sklearn.metrics.pairwise import cosine_similarity
 
 from autointent import Context, Embedder
+from autointent.context.optimization_info import ScorerArtifact
 from autointent.custom_types import ListOfLabels
+from autointent.metrics import SCORING_METRICS_MULTICLASS, SCORING_METRICS_MULTILABEL
 from autointent.modules.abc import ScoringModule
 
 
@@ -153,3 +155,31 @@ def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels, lis
             context.data_handler.train_labels(0),
             context.data_handler.intent_descriptions,
         )
+
+    def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
+        metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
+        chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
+
+        all_val_scores = []
+        metrics_values: dict[str, list[float]] = {name: [] for name in chosen_metrics}
+        for j in range(context.data_handler.n_folds):
+            val_labels = context.data_handler.train_labels(j)
+            val_utterances = context.data_handler.train_utterances(j)
+
+            train_folds = [i for i in range(context.data_handler.n_folds) if i != j]
+            train_labels = [lab for i_fold in train_folds for lab in context.data_handler.train_labels(i_fold)]
+            train_utterances = [ut for i_fold in train_folds for ut in context.data_handler.train_utterances(i_fold)]
+
+            # filter out all OOS samples from train
+            train_utterances = [ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None]
+            train_labels = [lab for lab in train_labels if lab is not None]
+
+            self.fit(train_utterances, train_labels, context.data_handler.intent_descriptions)  # type: ignore[arg-type]
+
+            val_scores = self.predict(val_utterances)
+            for name, fn in chosen_metrics.items():
+                metrics_values[name].append(fn(val_labels, val_scores))
+            all_val_scores.append(val_scores)
+
+        self._artifact = ScorerArtifact(folded_scores=all_val_scores)
+        return {name: float(np.mean(values_list)) for name, values_list in metrics_values.items()}

From 9602af840c05da9e2d4f380c13f0e6aecb7541bd Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 13:28:30 +0300
Subject: [PATCH 39/74] refactor cv iterator for description node

---
 autointent/modules/abc/_base.py               |  3 +-
 .../scoring/_description/description.py       | 35 ++++++++-----------
 2 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index d06fe1018..4ebac0af2 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -136,12 +136,13 @@ def score_metrics_cv(
         self,
         metrics_dict: dict[str, Any],
         cv_iterator: Iterable[tuple[list[str], ListOfLabels, list[str], ListOfLabels]],
+        **fit_kwargs: dict[str, Any],
     ) -> tuple[dict[str, float], list[ListOfGenericLabels] | list[npt.NDArray[Any]]]:
         metrics_values: dict[str, list[float]] = {name: [] for name in metrics_dict}
         all_val_preds = []
 
         for train_utterances, train_labels, val_utterances, val_labels in cv_iterator:
-            self.fit(train_utterances, train_labels)  # type: ignore[arg-type]
+            self.fit(train_utterances, train_labels, **fit_kwargs)  # type: ignore[arg-type]
             val_preds = self.predict(val_utterances)
             for name, fn in metrics_dict.items():
                 metrics_values[name].append(fn(val_labels, val_preds))
diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py
index 28cd84d9a..d16b8c18f 100644
--- a/autointent/modules/scoring/_description/description.py
+++ b/autointent/modules/scoring/_description/description.py
@@ -157,29 +157,22 @@ def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels, lis
         )
 
     def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]:
+        """
+        Evaluate the scorer on a test set and compute the specified metric.
+
+        :param context: Context containing test set and other data.
+        :param split: Target split
+        :return: Computed metrics value for the test set or error code of metrics
+        """
         metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS
         chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics}
 
-        all_val_scores = []
-        metrics_values: dict[str, list[float]] = {name: [] for name in chosen_metrics}
-        for j in range(context.data_handler.n_folds):
-            val_labels = context.data_handler.train_labels(j)
-            val_utterances = context.data_handler.train_utterances(j)
-
-            train_folds = [i for i in range(context.data_handler.n_folds) if i != j]
-            train_labels = [lab for i_fold in train_folds for lab in context.data_handler.train_labels(i_fold)]
-            train_utterances = [ut for i_fold in train_folds for ut in context.data_handler.train_utterances(i_fold)]
-
-            # filter out all OOS samples from train
-            train_utterances = [ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None]
-            train_labels = [lab for lab in train_labels if lab is not None]
-
-            self.fit(train_utterances, train_labels, context.data_handler.intent_descriptions)  # type: ignore[arg-type]
-
-            val_scores = self.predict(val_utterances)
-            for name, fn in chosen_metrics.items():
-                metrics_values[name].append(fn(val_labels, val_scores))
-            all_val_scores.append(val_scores)
+        metrics_calculated, all_val_scores = self.score_metrics_cv(
+            chosen_metrics,
+            context.data_handler.validation_iterator(),
+            descriptions=context.data_handler.intent_descriptions,
+        )
 
         self._artifact = ScorerArtifact(folded_scores=all_val_scores)
-        return {name: float(np.mean(values_list)) for name, values_list in metrics_values.items()}
+
+        return metrics_calculated

From ea39b36b1f9ec8d84e6ec66e551a6f09bc814338 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 13:30:22 +0300
Subject: [PATCH 40/74] fix typing

---
 autointent/modules/abc/_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py
index 4ebac0af2..cef5976df 100644
--- a/autointent/modules/abc/_base.py
+++ b/autointent/modules/abc/_base.py
@@ -132,11 +132,11 @@ def score_metrics_ho(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> d
             metrics[metric_name] = metric_fn(*params)
         return metrics
 
-    def score_metrics_cv(
+    def score_metrics_cv(  # type: ignore[no-untyped-def]
         self,
         metrics_dict: dict[str, Any],
         cv_iterator: Iterable[tuple[list[str], ListOfLabels, list[str], ListOfLabels]],
-        **fit_kwargs: dict[str, Any],
+        **fit_kwargs,  # noqa: ANN003
     ) -> tuple[dict[str, float], list[ListOfGenericLabels] | list[npt.NDArray[Any]]]:
         metrics_values: dict[str, list[float]] = {name: [] for name in metrics_dict}
         all_val_preds = []

From 903dfa7785287b76c8ce2287a67ec5ab637c6dac Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 13:55:54 +0300
Subject: [PATCH 41/74] add cache cleaning before refitting

---
 autointent/_ranker.py                                  | 5 +++++
 autointent/modules/embedding/_logreg.py                | 5 ++++-
 autointent/modules/embedding/_retrieval.py             | 3 +++
 autointent/modules/scoring/_description/description.py | 3 +++
 autointent/modules/scoring/_dnnc/dnnc.py               | 3 +++
 autointent/modules/scoring/_knn/knn.py                 | 3 +++
 autointent/modules/scoring/_knn/rerank_scorer.py       | 6 ++++++
 autointent/modules/scoring/_linear.py                  | 3 +++
 autointent/modules/scoring/_mlknn/mlknn.py             | 3 +++
 autointent/modules/scoring/_sklearn/sklearn_scorer.py  | 3 +++
 10 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/autointent/_ranker.py b/autointent/_ranker.py
index 7d5663557..879c1b8ee 100644
--- a/autointent/_ranker.py
+++ b/autointent/_ranker.py
@@ -272,3 +272,8 @@ def load(cls, path: Path) -> "Ranker":
             metadata: CrossEncoderMetadata = json.load(file)
 
         return cls(**metadata, classifier_head=clf)
+
+    def clear_ram(self) -> None:
+        self.cross_encoder.cpu()
+        del self.cross_encoder
+        torch.cuda.empty_cache()
diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py
index c50dab19d..89138242d 100644
--- a/autointent/modules/embedding/_logreg.py
+++ b/autointent/modules/embedding/_logreg.py
@@ -96,7 +96,7 @@ def from_context(
         )
 
     def clear_cache(self) -> None:
-        pass
+        self._embedder.clear_ram()
 
     def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         """
@@ -105,6 +105,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         :param utterances: List of text data to index.
         :param labels: List of corresponding labels for the utterances.
         """
+        if hasattr(self, "_embedder"):
+            self.clear_cache()
+
         self._validate_task(labels)
 
         self._embedder = Embedder(
diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py
index c68e62af0..5063a1b00 100644
--- a/autointent/modules/embedding/_retrieval.py
+++ b/autointent/modules/embedding/_retrieval.py
@@ -96,6 +96,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         :param utterances: List of text data to index.
         :param labels: List of corresponding labels for the utterances.
         """
+        if hasattr(self, "_vector_index"):
+            self.clear_cache()
+
         self._validate_task(labels)
 
         self._vector_index = VectorIndex(
diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py
index d16b8c18f..79ade2aa7 100644
--- a/autointent/modules/scoring/_description/description.py
+++ b/autointent/modules/scoring/_description/description.py
@@ -109,6 +109,9 @@ def fit(
         :param descriptions: List of intent descriptions.
         :raises ValueError: If descriptions contain None values or embeddings mismatch utterances.
         """
+        if hasattr(self, "_embedder"):
+            self._embedder.clear_ram()
+
         self._validate_task(labels)
 
         if any(description is None for description in descriptions):
diff --git a/autointent/modules/scoring/_dnnc/dnnc.py b/autointent/modules/scoring/_dnnc/dnnc.py
index 6d4411f52..0610785ae 100644
--- a/autointent/modules/scoring/_dnnc/dnnc.py
+++ b/autointent/modules/scoring/_dnnc/dnnc.py
@@ -157,6 +157,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         :param labels: List of labels corresponding to the utterances.
         :raises ValueError: If the vector index mismatches the provided utterances.
         """
+        if hasattr(self, "_vector_index"):
+            self.clear_cache()
+
         self._validate_task(labels)
 
         self._vector_index = VectorIndex(
diff --git a/autointent/modules/scoring/_knn/knn.py b/autointent/modules/scoring/_knn/knn.py
index cda70bd9a..9b9eed8fc 100644
--- a/autointent/modules/scoring/_knn/knn.py
+++ b/autointent/modules/scoring/_knn/knn.py
@@ -132,6 +132,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         :param labels: List of labels corresponding to the utterances.
         :raises ValueError: If the vector index mismatches the provided utterances.
         """
+        if hasattr(self, "_vector_index"):
+            self.clear_cache()
+
         self._validate_task(labels)
 
         self._vector_index = VectorIndex(
diff --git a/autointent/modules/scoring/_knn/rerank_scorer.py b/autointent/modules/scoring/_knn/rerank_scorer.py
index 546f0737a..671a2a2d7 100644
--- a/autointent/modules/scoring/_knn/rerank_scorer.py
+++ b/autointent/modules/scoring/_knn/rerank_scorer.py
@@ -127,6 +127,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         :param utterances: List of utterances to fit the scorer.
         :param labels: List of labels corresponding to the utterances.
         """
+        if hasattr(self, "_scorer"):
+            self.clear_cache()
+
         self._scorer = Ranker(
             self.cross_encoder_name,
             device=self.cross_encoder_device,
@@ -138,6 +141,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
 
         super().fit(utterances, labels)
 
+    def clear_cache(self) -> None:
+        self._scorer.clear_ram()
+
     def _predict(self, utterances: list[str]) -> tuple[npt.NDArray[Any], list[list[str]]]:
         """
         Predict the scores and neighbors for given utterances.
diff --git a/autointent/modules/scoring/_linear.py b/autointent/modules/scoring/_linear.py
index b07bd0f89..e0279b31a 100644
--- a/autointent/modules/scoring/_linear.py
+++ b/autointent/modules/scoring/_linear.py
@@ -127,6 +127,9 @@ def fit(
         :param labels: List of labels corresponding to the utterances.
         :raises ValueError: If the vector index mismatches the provided utterances.
         """
+        if hasattr(self, "_clf"):
+            self.clear_cache()
+
         self._validate_task(labels)
 
         embedder = Embedder(
diff --git a/autointent/modules/scoring/_mlknn/mlknn.py b/autointent/modules/scoring/_mlknn/mlknn.py
index d0622306a..b43763ab0 100644
--- a/autointent/modules/scoring/_mlknn/mlknn.py
+++ b/autointent/modules/scoring/_mlknn/mlknn.py
@@ -137,6 +137,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         :raises TypeError: If the labels are not multi-label.
         :raises ValueError: If the vector index mismatches the provided utterances.
         """
+        if hasattr(self, "_vector_index"):
+            self.clear_cache()
+
         self._validate_task(labels)
 
         self._vector_index = VectorIndex(
diff --git a/autointent/modules/scoring/_sklearn/sklearn_scorer.py b/autointent/modules/scoring/_sklearn/sklearn_scorer.py
index a4d9753b4..a54c200bf 100644
--- a/autointent/modules/scoring/_sklearn/sklearn_scorer.py
+++ b/autointent/modules/scoring/_sklearn/sklearn_scorer.py
@@ -112,6 +112,9 @@ def fit(
         :param labels: List of labels corresponding to the utterances.
         :raises ValueError: If the vector index mismatches the provided utterances.
         """
+        if hasattr(self, "_clf"):
+            self.clear_cache()
+
         self._validate_task(labels)
 
         embedder = Embedder(

From 5cbf83ee03749229ae50dd8b5ef93c05c3d3f765 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 14:25:19 +0300
Subject: [PATCH 42/74] bug fix

---
 autointent/_ranker.py                            | 4 +++-
 autointent/modules/scoring/_knn/knn.py           | 4 ++--
 autointent/modules/scoring/_knn/rerank_scorer.py | 3 ++-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/autointent/_ranker.py b/autointent/_ranker.py
index 879c1b8ee..a19b11d17 100644
--- a/autointent/_ranker.py
+++ b/autointent/_ranker.py
@@ -3,6 +3,7 @@
 Can be used to rank retrieved sentences by meaning closeness to provided utterance.
 """
 
+import gc
 import itertools as it
 import json
 import logging
@@ -274,6 +275,7 @@ def load(cls, path: Path) -> "Ranker":
         return cls(**metadata, classifier_head=clf)
 
     def clear_ram(self) -> None:
-        self.cross_encoder.cpu()
+        self.cross_encoder.model.cpu()
         del self.cross_encoder
+        gc.collect()
         torch.cuda.empty_cache()
diff --git a/autointent/modules/scoring/_knn/knn.py b/autointent/modules/scoring/_knn/knn.py
index 9b9eed8fc..ab665dfe7 100644
--- a/autointent/modules/scoring/_knn/knn.py
+++ b/autointent/modules/scoring/_knn/knn.py
@@ -124,7 +124,7 @@ def get_embedder_name(self) -> str:
         """
         return self.embedder_name
 
-    def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
+    def fit(self, utterances: list[str], labels: ListOfLabels, clear_cache: bool = False) -> None:
         """
         Fit the scorer by training or loading the vector index.
 
@@ -132,7 +132,7 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         :param labels: List of labels corresponding to the utterances.
         :raises ValueError: If the vector index mismatches the provided utterances.
         """
-        if hasattr(self, "_vector_index"):
+        if hasattr(self, "_vector_index") and clear_cache:
             self.clear_cache()
 
         self._validate_task(labels)
diff --git a/autointent/modules/scoring/_knn/rerank_scorer.py b/autointent/modules/scoring/_knn/rerank_scorer.py
index 671a2a2d7..152f147ca 100644
--- a/autointent/modules/scoring/_knn/rerank_scorer.py
+++ b/autointent/modules/scoring/_knn/rerank_scorer.py
@@ -139,10 +139,11 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None:
         )
         self._scorer.fit(utterances, labels)
 
-        super().fit(utterances, labels)
+        super().fit(utterances, labels, clear_cache=False)
 
     def clear_cache(self) -> None:
         self._scorer.clear_ram()
+        super().clear_cache()
 
     def _predict(self, utterances: list[str]) -> tuple[npt.NDArray[Any], list[list[str]]]:
         """

From 89c64065f7bfcd324c175433daa65f40a4fd956e Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 15:10:25 +0300
Subject: [PATCH 43/74] implement refitting the whole pipeline with all train
 data

---
 autointent/_pipeline/_pipeline.py             | 26 +++++++-
 .../context/data_handler/_data_handler.py     | 64 ++++++++++++-------
 tests/pipeline/test_optimization.py           |  2 +-
 3 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index 42a93e415..acabe02bd 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -122,7 +122,7 @@ def _is_inference(self) -> bool:
         """
         return isinstance(self.nodes[NodeType.scoring], InferenceNode)
 
-    def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> Context:
+    def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho", refit_after: bool = False) -> Context:
         """
         Optimize the pipeline from dataset.
 
@@ -150,6 +150,9 @@ def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> Context:
 
         self.nodes = {node.node_type: node for node in nodes_list}
 
+        if refit_after:
+            self._refit(context)
+
         predictions = self.predict(context.data_handler.test_utterances())
         for metric_name, metric in PREDICTION_METRICS_MULTILABEL.items():
             context.optimization_info.pipeline_metrics[metric_name] = metric(
@@ -210,6 +213,27 @@ def predict(self, utterances: list[str]) -> ListOfGenericLabels:
         scores = scoring_module.predict(utterances)
         return decision_module.predict(scores)
 
+    def _refit(self, context: Context) -> None:
+        """
+        Fit pipeline of already selected modules with all train data.
+
+        :param utterances: list of utterances
+        :return: list of predicted labels
+        """
+        if not self._is_inference():
+            msg = "Pipeline in optimization mode cannot perform inference"
+            raise RuntimeError(msg)
+
+        scoring_module: ScoringModule = self.nodes[NodeType.scoring].module  # type: ignore[assignment,union-attr]
+        decision_module: DecisionModule = self.nodes[NodeType.decision].module  # type: ignore[assignment,union-attr]
+
+        context.data_handler.prepare_for_refit()
+
+        scoring_module.fit(context.data_handler.train_utterances(0), context.data_handler.train_labels(0))
+        scores = scoring_module.predict(context.data_handler.train_utterances(1))
+
+        decision_module.fit(scores, context.data_handler.train_labels(1))
+
     def predict_with_metadata(self, utterances: list[str]) -> InferencePipelineOutput:
         """
         Predict the labels for the utterances with metadata.
diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index fce8970a7..6a1d4fa59 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -46,6 +46,7 @@ def __init__(
                             threshold search).
         """
         set_seed(random_seed)
+        self.random_seed = random_seed
 
         self.dataset = dataset
 
@@ -54,9 +55,9 @@ def __init__(
         self.n_folds = n_folds
 
         if scheme == "ho":
-            self._split_ho(random_seed, split_train)
+            self._split_ho(split_train)
         elif scheme == "cv":
-            self._split_cv(random_seed)
+            self._split_cv()
 
         self.regexp_patterns = [
             RegexPatterns(
@@ -185,20 +186,20 @@ def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[s
             train_labels = [lab for lab in train_labels if lab is not None]
             yield train_utterances, train_labels, val_utterances, val_labels  # type: ignore[misc]
 
-    def _split_ho(self, random_seed: int, split_train: bool) -> None:
+    def _split_ho(self, split_train: bool) -> None:
         has_validation_split = any(split.startswith(Split.VALIDATION) for split in self.dataset)
 
         if split_train and Split.TRAIN in self.dataset:
-            self._split_train(random_seed)
+            self._split_train()
 
         if Split.TEST not in self.dataset:
             test_size = 0.1 if has_validation_split else 0.2
-            self._split_test(test_size, random_seed)
+            self._split_test(test_size)
 
         if not has_validation_split:
-            self._split_validation_from_train(random_seed)
+            self._split_validation_from_train()
         elif Split.VALIDATION in self.dataset:
-            self._split_validation(random_seed)
+            self._split_validation()
 
         for split in self.dataset:
             n_classes_split = self.dataset.get_n_classes(split)
@@ -209,7 +210,7 @@ def _split_ho(self, random_seed: int, split_train: bool) -> None:
                 )
                 raise ValueError(message)
 
-    def _split_train(self, random_seed: int) -> None:
+    def _split_train(self) -> None:
         """
         Split on two sets.
 
@@ -219,12 +220,12 @@ def _split_train(self, random_seed: int) -> None:
             self.dataset,
             split=Split.TRAIN,
             test_size=0.5,
-            random_seed=random_seed,
+            random_seed=self.random_seed,
             allow_oos_in_train=False,  # only train data for decision node should contain OOS
         )
         self.dataset.pop(Split.TRAIN)
 
-    def _split_validation(self, random_seed: int) -> None:
+    def _split_validation(self) -> None:
         """
         Split on two sets.
 
@@ -234,21 +235,21 @@ def _split_validation(self, random_seed: int) -> None:
             self.dataset,
             split=Split.VALIDATION,
             test_size=0.5,
-            random_seed=random_seed,
+            random_seed=self.random_seed,
             allow_oos_in_train=False,  # only val data for decision node should contain OOS
         )
         self.dataset.pop(Split.VALIDATION)
 
-    def _split_validation_from_test(self, random_seed: int) -> None:
+    def _split_validation_from_test(self) -> None:
         self.dataset[Split.TEST], self.dataset[Split.VALIDATION] = split_dataset(
             self.dataset,
             split=Split.TEST,
             test_size=0.5,
-            random_seed=random_seed,
+            random_seed=self.random_seed,
             allow_oos_in_train=True,  # both test and validation splits can contain OOS
         )
 
-    def _split_cv(self, random_seed: int) -> None:
+    def _split_cv(self) -> None:
         extra_splits = [split_name for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]]
         if extra_splits:
             self.dataset[Split.TRAIN] = concatenate_datasets(
@@ -257,7 +258,7 @@ def _split_cv(self, random_seed: int) -> None:
 
         if Split.TEST not in self.dataset:
             self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset(
-                self.dataset, split=Split.TRAIN, test_size=0.2, random_seed=random_seed, allow_oos_in_train=True
+                self.dataset, split=Split.TRAIN, test_size=0.2, random_seed=self.random_seed, allow_oos_in_train=True
             )
 
         for j in range(self.n_folds - 1):
@@ -265,18 +266,18 @@ def _split_cv(self, random_seed: int) -> None:
                 self.dataset,
                 split=Split.TRAIN,
                 test_size=1 / (self.n_folds - j),
-                random_seed=random_seed,
+                random_seed=self.random_seed,
                 allow_oos_in_train=True,
             )
         self.dataset[f"{Split.TRAIN}_{self.n_folds-1}"] = self.dataset.pop(Split.TRAIN)
 
-    def _split_validation_from_train(self, random_seed: int) -> None:
+    def _split_validation_from_train(self) -> None:
         if Split.TRAIN in self.dataset:
             self.dataset[Split.TRAIN], self.dataset[Split.VALIDATION] = split_dataset(
                 self.dataset,
                 split=Split.TRAIN,
                 test_size=0.2,
-                random_seed=random_seed,
+                random_seed=self.random_seed,
                 allow_oos_in_train=True,
             )
         else:
@@ -285,23 +286,23 @@ def _split_validation_from_train(self, random_seed: int) -> None:
                     self.dataset,
                     split=f"{Split.TRAIN}_{idx}",
                     test_size=0.2,
-                    random_seed=random_seed,
+                    random_seed=self.random_seed,
                     allow_oos_in_train=idx == 1,  # for decision node it's ok to have oos in train
                 )
 
-    def _split_test(self, test_size: float, random_seed: int) -> None:
+    def _split_test(self, test_size: float) -> None:
         """Obtain test set from train."""
         self.dataset[f"{Split.TRAIN}_0"], self.dataset[f"{Split.TEST}_0"] = split_dataset(
             self.dataset,
             split=f"{Split.TRAIN}_0",
             test_size=test_size,
-            random_seed=random_seed,
+            random_seed=self.random_seed,
         )
         self.dataset[f"{Split.TRAIN}_1"], self.dataset[f"{Split.TEST}_1"] = split_dataset(
             self.dataset,
             split=f"{Split.TRAIN}_1",
             test_size=test_size,
-            random_seed=random_seed,
+            random_seed=self.random_seed,
             allow_oos_in_train=True,
         )
         self.dataset[Split.TEST] = concatenate_datasets(
@@ -309,3 +310,22 @@ def _split_test(self, test_size: float, random_seed: int) -> None:
         )
         self.dataset.pop(f"{Split.TEST}_0")
         self.dataset.pop(f"{Split.TEST}_1")
+
+    def prepare_for_refit(self) -> None:
+        if self.scheme == "ho":
+            return
+
+        train_folds = [split_name for split_name in self.dataset if split_name.startswith("train")]
+        self.dataset[Split.TRAIN] = concatenate_datasets([self.dataset[name] for name in train_folds])
+        for name in train_folds:
+            self.dataset.pop(name)
+
+        self.dataset[f"{Split.TRAIN}_0"], self.dataset[f"{Split.TRAIN}_1"] = split_dataset(
+            self.dataset,
+            split=Split.TRAIN,
+            test_size=0.5,
+            random_seed=self.random_seed,
+            allow_oos_in_train=False,
+        )
+
+        self.dataset.pop(Split.TRAIN)
diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py
index 6dd35b690..77344c9a3 100644
--- a/tests/pipeline/test_optimization.py
+++ b/tests/pipeline/test_optimization.py
@@ -31,7 +31,7 @@ def test_cv(dataset, task_type):
     if task_type == "multilabel":
         dataset = dataset.to_multilabel()
 
-    context = pipeline_optimizer.fit(dataset, scheme="cv")
+    context = pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True)
     context.dump()
 
     assert os.listdir(pipeline_optimizer.logging_config.dump_dir)

From 7f4d7ac09db596508b076ba685432cef90f20d34 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 15:11:24 +0300
Subject: [PATCH 44/74] fix typing

---
 autointent/_pipeline/_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index acabe02bd..ef8a658cc 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -229,7 +229,7 @@ def _refit(self, context: Context) -> None:
 
         context.data_handler.prepare_for_refit()
 
-        scoring_module.fit(context.data_handler.train_utterances(0), context.data_handler.train_labels(0))
+        scoring_module.fit(context.data_handler.train_utterances(0), context.data_handler.train_labels(0))  # type: ignore[arg-type]
         scores = scoring_module.predict(context.data_handler.train_utterances(1))
 
         decision_module.fit(scores, context.data_handler.train_labels(1))

From d22b2db8132b529ea56944fc4ef9c905cb089d12 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 15:34:14 +0300
Subject: [PATCH 45/74] bug fix

---
 autointent/_pipeline/_pipeline.py | 4 ++--
 tests/data/test_stratificaiton.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index ef8a658cc..c401d8425 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -229,10 +229,10 @@ def _refit(self, context: Context) -> None:
 
         context.data_handler.prepare_for_refit()
 
-        scoring_module.fit(context.data_handler.train_utterances(0), context.data_handler.train_labels(0))  # type: ignore[arg-type]
+        scoring_module.fit(*scoring_module.get_train_data(context))  # type: ignore[arg-type]
         scores = scoring_module.predict(context.data_handler.train_utterances(1))
 
-        decision_module.fit(scores, context.data_handler.train_labels(1))
+        decision_module.fit(scores, context.data_handler.train_labels(1), context.data_handler.tags)
 
     def predict_with_metadata(self, utterances: list[str]) -> InferencePipelineOutput:
         """
diff --git a/tests/data/test_stratificaiton.py b/tests/data/test_stratificaiton.py
index 08ae46099..37151ddf2 100644
--- a/tests/data/test_stratificaiton.py
+++ b/tests/data/test_stratificaiton.py
@@ -38,6 +38,6 @@ def test_multilabel_train_test_split(dataset_unsplitted):
 
     assert Split.TRAIN in dataset
     assert Split.TEST in dataset
-    assert dataset[Split.TRAIN].num_rows == 17
-    assert dataset[Split.TEST].num_rows == 19
+    assert dataset[Split.TRAIN].num_rows == 19
+    assert dataset[Split.TEST].num_rows == 17
     assert dataset.get_n_classes(Split.TRAIN) == dataset.get_n_classes(Split.TEST)

From 82e8c3f1f6b73a608f64bd3eab9a0105ff95f505 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Thu, 6 Feb 2025 16:00:05 +0300
Subject: [PATCH 46/74] fix typing

---
 autointent/_pipeline/_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index c401d8425..33c58f5b1 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -229,7 +229,7 @@ def _refit(self, context: Context) -> None:
 
         context.data_handler.prepare_for_refit()
 
-        scoring_module.fit(*scoring_module.get_train_data(context))  # type: ignore[arg-type]
+        scoring_module.fit(*scoring_module.get_train_data(context))
         scores = scoring_module.predict(context.data_handler.train_utterances(1))
 
         decision_module.fit(scores, context.data_handler.train_labels(1), context.data_handler.tags)

From 4f2b4c8e90f8947090b7b57548570c0d80f36c54 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 7 Feb 2025 13:23:56 +0300
Subject: [PATCH 47/74] respond to samoed

---
 autointent/context/data_handler/_data_handler.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index 6a1d4fa59..7aeb4fd36 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -315,10 +315,8 @@ def prepare_for_refit(self) -> None:
         if self.scheme == "ho":
             return
 
-        train_folds = [split_name for split_name in self.dataset if split_name.startswith("train")]
-        self.dataset[Split.TRAIN] = concatenate_datasets([self.dataset[name] for name in train_folds])
-        for name in train_folds:
-            self.dataset.pop(name)
+        train_folds = [split_name for split_name in self.dataset if split_name.startswith(Split.TRAIN)]
+        self.dataset[Split.TRAIN] = concatenate_datasets([self.dataset.pop(name) for name in train_folds])
 
         self.dataset[f"{Split.TRAIN}_0"], self.dataset[f"{Split.TRAIN}_1"] = split_dataset(
             self.dataset,

From 46ea8598a3dcb9d2ecc2693f0e5be0638b5eda02 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Mon, 10 Feb 2025 18:23:06 +0300
Subject: [PATCH 48/74] create `ValidationType` in `autointent.custom_types`

---
 autointent/_pipeline/_pipeline.py                | 6 +++---
 autointent/configs/_optimization.py              | 5 +++--
 autointent/context/_context.py                   | 5 +++--
 autointent/context/data_handler/_data_handler.py | 6 +++---
 autointent/custom_types.py                       | 3 +++
 5 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index 33c58f5b1..d6e06acbb 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -3,14 +3,14 @@
 import json
 import logging
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 import yaml
 
 from autointent import Context, Dataset
 from autointent.configs import CrossEncoderConfig, EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig
-from autointent.custom_types import ListOfGenericLabels, NodeType
+from autointent.custom_types import ListOfGenericLabels, NodeType, ValidationType
 from autointent.metrics import PREDICTION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
 from autointent.nodes.schemes import OptimizationConfig
@@ -122,7 +122,7 @@ def _is_inference(self) -> bool:
         """
         return isinstance(self.nodes[NodeType.scoring], InferenceNode)
 
-    def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho", refit_after: bool = False) -> Context:
+    def fit(self, dataset: Dataset, scheme: ValidationType = "ho", refit_after: bool = False) -> Context:
         """
         Optimize the pipeline from dataset.
 
diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py
index 27ecc52a2..23f94159f 100644
--- a/autointent/configs/_optimization.py
+++ b/autointent/configs/_optimization.py
@@ -1,10 +1,11 @@
 """Configuration for the optimization process."""
 
 from pathlib import Path
-from typing import Literal
 
 from pydantic import BaseModel, Field
 
+from autointent.custom_types import ValidationType
+
 from ._name import get_run_name
 
 
@@ -13,7 +14,7 @@ class DataConfig(BaseModel):
 
     train_path: str | Path
     """Path to the training data. Can be local path or HF repo."""
-    scheme: Literal["ho", "cv"]
+    scheme: ValidationType
     """Hold-out or cross-validation."""
 
 
diff --git a/autointent/context/_context.py b/autointent/context/_context.py
index dd73db842..15ef8e044 100644
--- a/autointent/context/_context.py
+++ b/autointent/context/_context.py
@@ -3,7 +3,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any
 
 import yaml
 
@@ -16,6 +16,7 @@
     LoggingConfig,
     VectorIndexConfig,
 )
+from autointent.custom_types import ValidationType
 
 from ._utils import NumpyEncoder, load_dataset
 from .data_handler import DataHandler
@@ -84,7 +85,7 @@ def configure_data(self, config: DataConfig) -> None:
             dataset=load_dataset(config.train_path), random_seed=self.seed, scheme=config.scheme
         )
 
-    def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> None:
+    def set_dataset(self, dataset: Dataset, scheme: ValidationType = "ho") -> None:
         """
         Set the datasets for training, validation and testing.
 
diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py
index 7aeb4fd36..ff450724c 100644
--- a/autointent/context/data_handler/_data_handler.py
+++ b/autointent/context/data_handler/_data_handler.py
@@ -2,13 +2,13 @@
 
 import logging
 from collections.abc import Generator
-from typing import Literal, TypedDict, cast
+from typing import TypedDict, cast
 
 from datasets import concatenate_datasets
 from transformers import set_seed
 
 from autointent import Dataset
-from autointent.custom_types import ListOfGenericLabels, ListOfLabels, Split
+from autointent.custom_types import ListOfGenericLabels, ListOfLabels, Split, ValidationType
 
 from ._stratification import split_dataset
 
@@ -32,7 +32,7 @@ class DataHandler:  # TODO rename to Validator
     def __init__(
         self,
         dataset: Dataset,
-        scheme: Literal["cv", "ho"] = "ho",
+        scheme: ValidationType = "ho",
         split_train: bool = True,
         random_seed: int = 0,
         n_folds: int = 3,
diff --git a/autointent/custom_types.py b/autointent/custom_types.py
index c6fe95350..563d42320 100644
--- a/autointent/custom_types.py
+++ b/autointent/custom_types.py
@@ -69,3 +69,6 @@ class Split:
     VALIDATION = "validation"
     TEST = "test"
     INTENTS = "intents"
+
+
+ValidationType = Literal["ho", "cv"]

From 79e25bfa3cea2acca909b3301d2fbb9a71a54a92 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Mon, 10 Feb 2025 18:25:29 +0300
Subject: [PATCH 49/74] fix docstring

---
 autointent/_pipeline/_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index d6e06acbb..0ae703295 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -217,7 +217,7 @@ def _refit(self, context: Context) -> None:
         """
         Fit pipeline of already selected modules with all train data.
 
-        :param utterances: list of utterances
+        :param context: context object to take data from
         :return: list of predicted labels
         """
         if not self._is_inference():

From 231124499dc160c441ebd1e5b8e76c54e749b592 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Mon, 10 Feb 2025 18:36:25 +0300
Subject: [PATCH 50/74] properly expose `n_folds` argument

---
 autointent/_pipeline/_pipeline.py   | 6 ++++--
 autointent/configs/_optimization.py | 2 ++
 autointent/context/_context.py      | 3 ++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index 0ae703295..3e07dfbbd 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -122,7 +122,9 @@ def _is_inference(self) -> bool:
         """
         return isinstance(self.nodes[NodeType.scoring], InferenceNode)
 
-    def fit(self, dataset: Dataset, scheme: ValidationType = "ho", refit_after: bool = False) -> Context:
+    def fit(
+        self, dataset: Dataset, scheme: ValidationType = "ho", n_folds: int = 3, refit_after: bool = False
+    ) -> Context:
         """
         Optimize the pipeline from dataset.
 
@@ -134,7 +136,7 @@ def fit(self, dataset: Dataset, scheme: ValidationType = "ho", refit_after: bool
             raise RuntimeError(msg)
 
         context = Context()
-        context.set_dataset(dataset, scheme)
+        context.set_dataset(dataset, scheme, n_folds)
         context.configure_logging(self.logging_config)
         context.configure_vector_index(self.vector_index_config, self.embedder_config)
         context.configure_cross_encoder(self.cross_encoder_config)
diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py
index 23f94159f..4195f9520 100644
--- a/autointent/configs/_optimization.py
+++ b/autointent/configs/_optimization.py
@@ -16,6 +16,8 @@ class DataConfig(BaseModel):
     """Path to the training data. Can be local path or HF repo."""
     scheme: ValidationType
     """Hold-out or cross-validation."""
+    n_folds: int = 3
+    """Number of folds in cross-validation."""
 
 
 class TaskConfig(BaseModel):
diff --git a/autointent/context/_context.py b/autointent/context/_context.py
index 15ef8e044..aa99a75a0 100644
--- a/autointent/context/_context.py
+++ b/autointent/context/_context.py
@@ -85,7 +85,7 @@ def configure_data(self, config: DataConfig) -> None:
             dataset=load_dataset(config.train_path), random_seed=self.seed, scheme=config.scheme
         )
 
-    def set_dataset(self, dataset: Dataset, scheme: ValidationType = "ho") -> None:
+    def set_dataset(self, dataset: Dataset, scheme: ValidationType = "ho", n_folds: int = 3) -> None:
         """
         Set the datasets for training, validation and testing.
 
@@ -95,6 +95,7 @@ def set_dataset(self, dataset: Dataset, scheme: ValidationType = "ho") -> None:
             dataset=dataset,
             random_seed=self.seed,
             scheme=scheme,
+            n_folds=n_folds,
         )
 
     def get_inference_config(self) -> dict[str, Any]:

From 2929423d32935b3376020ff4feac480f95820802 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Mon, 10 Feb 2025 20:39:09 +0300
Subject: [PATCH 51/74] implement `_fit_bayes`

---
 autointent/configs/_optimization.py           |   3 +-
 autointent/custom_types.py                    |   1 +
 .../nodes/_optimization/_node_optimizer.py    | 104 ++++++++++++++++--
 3 files changed, 100 insertions(+), 8 deletions(-)

diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py
index 4195f9520..c7003641b 100644
--- a/autointent/configs/_optimization.py
+++ b/autointent/configs/_optimization.py
@@ -4,7 +4,7 @@
 
 from pydantic import BaseModel, Field
 
-from autointent.custom_types import ValidationType
+from autointent.custom_types import TuningType, ValidationType
 
 from ._name import get_run_name
 
@@ -25,6 +25,7 @@ class TaskConfig(BaseModel):
 
     search_space_path: Path | None = None
     """Path to the search space configuration file. If None, the default search space will be used"""
+    sampler: TuningType = "brute"
 
 
 class LoggingConfig(BaseModel):
diff --git a/autointent/custom_types.py b/autointent/custom_types.py
index 563d42320..215b3967a 100644
--- a/autointent/custom_types.py
+++ b/autointent/custom_types.py
@@ -72,3 +72,4 @@ class Split:
 
 
 ValidationType = Literal["ho", "cv"]
+TuningType = Literal["brute", "bayes"]
diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 2271f9feb..0838cfd08 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -4,16 +4,24 @@
 import itertools as it
 import logging
 from copy import deepcopy
+from functools import partial
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal, TypedDict
 
+import optuna
 import torch
+from optuna.trial import Trial
 
 from autointent.context import Context
-from autointent.custom_types import NodeType
+from autointent.custom_types import NodeType, TuningType
 from autointent.nodes._nodes_info import NODES_INFO
 
 
+class ParamSpace(TypedDict):
+    type: Literal["cat", "int", "float"]
+    content: list[Any]
+
+
 class NodeOptimizer:
     """Node optimizer class."""
 
@@ -42,7 +50,7 @@ def __init__(
         self.modules_search_spaces = search_space  # TODO search space validation
         self._logger = logging.getLogger(__name__)  # TODO solve duplicate logging messages problem
 
-    def fit(self, context: Context) -> None:
+    def fit(self, context: Context, tuning: TuningType = "brute") -> None:
         """
         Fit the node optimizer.
 
@@ -50,6 +58,17 @@ def fit(self, context: Context) -> None:
         """
         self._logger.info("starting %s node optimization...", self.node_info.node_type)
 
+        if tuning == "brute":
+            self._fit_brute(context)
+        elif tuning == "bayes":
+            self._fit_bayes(context)
+        else:
+            msg = f"Unexepected tuning type: {tuning}"
+            raise ValueError(msg)
+
+        self._logger.info("%s node optimization is finished!", self.node_info.node_type)
+
+    def _fit_brute(self, context: Context) -> None:
         for search_space in deepcopy(self.modules_search_spaces):
             module_name = search_space.pop("module_name")
 
@@ -57,15 +76,16 @@ def fit(self, context: Context) -> None:
                 module_kwargs = dict(zip(search_space.keys(), params_combination, strict=False))
 
                 self._logger.debug("initializing %s module...", module_name)
-                context.callback_handler.start_module(
-                    module_name=module_name, num=j_combination, module_kwargs=module_kwargs
-                )
                 module = self.node_info.modules_available[module_name].from_context(context, **module_kwargs)
 
                 embedder_name = module.get_embedder_name()
                 if embedder_name is not None:
                     module_kwargs["embedder_name"] = embedder_name
 
+                context.callback_handler.start_module(
+                    module_name=module_name, num=j_combination, module_kwargs=module_kwargs
+                )
+
                 self._logger.debug("scoring %s module...", module_name)
                 metrics_score = module.score(context, metrics=self.metrics)
                 metric_value = metrics_score[self.target_metric]
@@ -97,7 +117,77 @@ def fit(self, context: Context) -> None:
                     gc.collect()
                     torch.cuda.empty_cache()
 
-        self._logger.info("%s node optimization is finished!", self.node_info.node_type)
+    def _fit_bayes(self, context: Context, seed: int = 42, n_trials: int = 10) -> None:
+        self._counter = 0
+        for search_space in deepcopy(self.modules_search_spaces):
+            study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed))
+            optuna.logging.set_verbosity(optuna.logging.WARNING)
+            module_name = search_space.pop("module_name")
+            obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context)
+            study.optimize(obj, n_trials=n_trials)
+
+    def objective(self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace], context: Context) -> float:
+        config = self.suggest(trial, search_space)
+
+        self._logger.debug("initializing %s module...", module_name)
+        module = self.node_info.modules_available[module_name].from_context(context, **config)
+
+        embedder_name = module.get_embedder_name()
+        if embedder_name is not None:
+            config["embedder_name"] = embedder_name
+
+        context.callback_handler.start_module(module_name=module_name, num=self._counter, module_kwargs=config)
+
+        self._logger.debug("scoring %s module...", module_name)
+        all_metrics = module.score(context, metrics=self.metrics)
+        target_metric = all_metrics[self.target_metric]
+
+        context.callback_handler.log_metrics(all_metrics)
+        context.callback_handler.end_module()
+
+        dump_dir = context.get_dump_dir()
+
+        if dump_dir is not None:
+            module_dump_dir = self.get_module_dump_dir(dump_dir, module_name, self._counter)
+            module.dump(module_dump_dir)
+        else:
+            module_dump_dir = None
+
+        context.optimization_info.log_module_optimization(
+            self.node_info.node_type,
+            module_name,
+            config,
+            target_metric,
+            self.target_metric,
+            module.get_assets(),  # retriever name / scores / predictions
+            module_dump_dir,
+            module=module if not context.is_ram_to_clear() else None,
+        )
+
+        if context.is_ram_to_clear():
+            module.clear_cache()
+            gc.collect()
+            torch.cuda.empty_cache()
+
+        self._counter += 1
+
+        return target_metric
+
+    def suggest(self, trial: Trial, search_space: dict[str, ParamSpace]) -> dict[str, Any]:
+        res = {}
+        for param_name, param_space in search_space.items():
+            if param_space["type"] == "cat":
+                res[param_name] = trial.suggest_categorical(param_name, choices=param_space["content"])
+            elif param_space["type"] == "int":
+                low, high, step, log = param_space["content"]
+                res[param_name] = trial.suggest_int(param_name, low=low, high=high, step=step, log=log)
+            elif param_space["type"] == "float":
+                low, high, step, log = param_space["content"]
+                res[param_name] = trial.suggest_float(param_name, low=low, high=high, step=step, log=log)
+            else:
+                msg = f"Unsupported type of param search space: {param_space['type']}"
+                raise RuntimeError(msg)
+        return res
 
     def get_module_dump_dir(self, dump_dir: Path, module_name: str, j_combination: int) -> str:
         """

From 43b1910fc3b5305d307e8eaf854568031ee1e620 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 21:15:23 +0300
Subject: [PATCH 52/74] add typing to param spaces

---
 .../nodes/_optimization/_node_optimizer.py    | 37 ++++++++++++++-----
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 0838cfd08..c7b52d7de 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -17,9 +17,24 @@
 from autointent.nodes._nodes_info import NODES_INFO
 
 
+class ParamSpaceCat(TypedDict):
+    choices: list[Any]
+
+class ParamSpaceInt(TypedDict, total=False):
+    low: int
+    high: int
+    step: int
+    log: bool
+
+class ParamSpaceFloat(TypedDict, total=False):
+    low: float
+    high: float
+    step: float
+    log: bool
+
 class ParamSpace(TypedDict):
     type: Literal["cat", "int", "float"]
-    content: list[Any]
+    content: ParamSpaceCat | ParamSpaceInt | ParamSpaceFloat
 
 
 class NodeOptimizer:
@@ -126,7 +141,9 @@ def _fit_bayes(self, context: Context, seed: int = 42, n_trials: int = 10) -> No
             obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context)
             study.optimize(obj, n_trials=n_trials)
 
-    def objective(self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace], context: Context) -> float:
+    def objective(
+        self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace | list[Any]], context: Context
+    ) -> float:
         config = self.suggest(trial, search_space)
 
         self._logger.debug("initializing %s module...", module_name)
@@ -173,17 +190,17 @@ def objective(self, trial: Trial, module_name: str, search_space: dict[str, Para
 
         return target_metric
 
-    def suggest(self, trial: Trial, search_space: dict[str, ParamSpace]) -> dict[str, Any]:
-        res = {}
+    def suggest(self, trial: Trial, search_space: dict[str, ParamSpace | list[Any]]) -> dict[str, Any]:
+        res: dict[str, Any] = {}
         for param_name, param_space in search_space.items():
-            if param_space["type"] == "cat":
-                res[param_name] = trial.suggest_categorical(param_name, choices=param_space["content"])
+            if isinstance(param_space, list):
+                res[param_name] = trial.suggest_categorical(param_name, choices=param_space)
+            elif param_space["type"] == "cat":
+                res[param_name] = trial.suggest_categorical(param_name, **param_space["content"])
             elif param_space["type"] == "int":
-                low, high, step, log = param_space["content"]
-                res[param_name] = trial.suggest_int(param_name, low=low, high=high, step=step, log=log)
+                res[param_name] = trial.suggest_int(param_name, **param_space["content"])
             elif param_space["type"] == "float":
-                low, high, step, log = param_space["content"]
-                res[param_name] = trial.suggest_float(param_name, low=low, high=high, step=step, log=log)
+                res[param_name] = trial.suggest_float(param_name, **param_space["content"])
             else:
                 msg = f"Unsupported type of param search space: {param_space['type']}"
                 raise RuntimeError(msg)

From a0ef81cec642a9d0b8ef012748449270a1a68334 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 21:16:08 +0300
Subject: [PATCH 53/74] minor bug fix

---
 autointent/nodes/_optimization/_node_optimizer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index c7b52d7de..5d8980831 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -202,8 +202,8 @@ def suggest(self, trial: Trial, search_space: dict[str, ParamSpace | list[Any]])
             elif param_space["type"] == "float":
                 res[param_name] = trial.suggest_float(param_name, **param_space["content"])
             else:
-                msg = f"Unsupported type of param search space: {param_space['type']}"
-                raise RuntimeError(msg)
+                msg = f"Unsupported type of param search space: {param_space}"
+                raise TypeError(msg)
         return res
 
     def get_module_dump_dir(self, dump_dir: Path, module_name: str, j_combination: int) -> str:

From b2f4dc27472fc8a075a093aae93e7749fae4dfc2 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 21:24:19 +0300
Subject: [PATCH 54/74] minor bug fix

---
 autointent/nodes/_optimization/_node_optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 5d8980831..1337bb585 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -133,8 +133,8 @@ def _fit_brute(self, context: Context) -> None:
                     torch.cuda.empty_cache()
 
     def _fit_bayes(self, context: Context, seed: int = 42, n_trials: int = 10) -> None:
-        self._counter = 0
         for search_space in deepcopy(self.modules_search_spaces):
+            self._counter = 0
             study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed))
             optuna.logging.set_verbosity(optuna.logging.WARNING)
             module_name = search_space.pop("module_name")

From 203b5ee7f1e82e0aded0d0b0fb8927fd5503595c Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 21:25:51 +0300
Subject: [PATCH 55/74] fix codestyle

---
 autointent/nodes/_optimization/_node_optimizer.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 1337bb585..e1acab6a7 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -20,18 +20,21 @@
 class ParamSpaceCat(TypedDict):
     choices: list[Any]
 
+
 class ParamSpaceInt(TypedDict, total=False):
     low: int
     high: int
     step: int
     log: bool
 
+
 class ParamSpaceFloat(TypedDict, total=False):
     low: float
     high: float
     step: float
     log: bool
 
+
 class ParamSpace(TypedDict):
     type: Literal["cat", "int", "float"]
     content: ParamSpaceCat | ParamSpaceInt | ParamSpaceFloat

From a78a60c1c0c35fa166ddd73b1d5334ef1f185692 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 21:56:18 +0300
Subject: [PATCH 56/74] add tuning selection to pipeline

---
 autointent/_pipeline/_pipeline.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index a6a7905f7..271efc432 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -10,7 +10,7 @@
 
 from autointent import Context, Dataset
 from autointent.configs import CrossEncoderConfig, EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig
-from autointent.custom_types import ListOfGenericLabels, NodeType, ValidationScheme
+from autointent.custom_types import ListOfGenericLabels, NodeType, TuningType, ValidationScheme
 from autointent.metrics import PREDICTION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
 from autointent.nodes.schemes import OptimizationConfig
@@ -93,7 +93,7 @@ def default_optimizer(cls, multilabel: bool, seed: int = 42) -> "Pipeline":
         """
         return cls.from_search_space(search_space=load_default_search_space(multilabel), seed=seed)
 
-    def _fit(self, context: Context) -> None:
+    def _fit(self, context: Context, tuning: TuningType = "brute") -> None:
         """
         Optimize the pipeline.
 
@@ -108,7 +108,7 @@ def _fit(self, context: Context) -> None:
         for node_type in NodeType:
             node_optimizer = self.nodes.get(node_type, None)
             if node_optimizer is not None:
-                node_optimizer.fit(context)  # type: ignore[union-attr]
+                node_optimizer.fit(context, tuning)  # type: ignore[union-attr]
         if not context.vector_index_config.save_db:
             self._logger.info("removing vector database from file system...")
             # TODO clear cache from appdirs
@@ -123,7 +123,12 @@ def _is_inference(self) -> bool:
         return isinstance(self.nodes[NodeType.scoring], InferenceNode)
 
     def fit(
-        self, dataset: Dataset, scheme: ValidationScheme = "ho", n_folds: int = 3, refit_after: bool = False
+        self,
+        dataset: Dataset,
+        scheme: ValidationScheme = "ho",
+        n_folds: int = 3,
+        refit_after: bool = False,
+        tuning: TuningType = "brute",
     ) -> Context:
         """
         Optimize the pipeline from dataset.
@@ -141,7 +146,7 @@ def fit(
         context.configure_vector_index(self.vector_index_config, self.embedder_config)
         context.configure_cross_encoder(self.cross_encoder_config)
         self.validate_modules(dataset)
-        self._fit(context)
+        self._fit(context, tuning)
 
         if context.is_ram_to_clear():
             nodes_configs = context.optimization_info.get_inference_nodes_config()

From 5ed5ac32d3dd773634a956ccc6722ad5f47c68e1 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 21:56:31 +0300
Subject: [PATCH 57/74] add test on bayes

---
 tests/assets/configs/bayes.yaml     | 30 +++++++++++++++++++++++++++++
 tests/conftest.py                   |  2 +-
 tests/pipeline/test_optimization.py | 14 ++++++++++++--
 3 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 tests/assets/configs/bayes.yaml

diff --git a/tests/assets/configs/bayes.yaml b/tests/assets/configs/bayes.yaml
new file mode 100644
index 000000000..5a31f4ec4
--- /dev/null
+++ b/tests/assets/configs/bayes.yaml
@@ -0,0 +1,30 @@
+- node_type: embedding
+  target_metric: retrieval_hit_rate
+  search_space:
+    - module_name: retrieval
+      k: [10]
+      embedder_name:
+        - sentence-transformers/all-MiniLM-L6-v2
+        - avsolatorio/GIST-small-Embedding-v0
+- node_type: scoring
+  target_metric: scoring_roc_auc
+  search_space:
+    - module_name: knn
+      k:
+        type: "int"
+        low: 5
+        high: 10
+        step: 1
+      weights: [uniform, distance, closest]
+    - module_name: linear
+- node_type: decision
+  target_metric: decision_accuracy
+  search_space:
+    - module_name: threshold
+      thresh:
+        type: float
+        low: 0.1
+        high: 0.9
+    - module_name: tunable
+    - module_name: argmax
+    - module_name: jinoos
diff --git a/tests/conftest.py b/tests/conftest.py
index 1945b3426..4d4190d4c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -27,7 +27,7 @@ def dataset_unsplitted():
     return Dataset.from_json(path)
 
 
-TaskType = Literal["multiclass", "multilabel", "description"]
+TaskType = Literal["multiclass", "multilabel", "description", "bayes"]
 
 
 def get_search_space_path(task_type: TaskType):
diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py
index 3c4b83893..dbf0b1cb9 100644
--- a/tests/pipeline/test_optimization.py
+++ b/tests/pipeline/test_optimization.py
@@ -1,5 +1,4 @@
 import os
-from typing import Literal
 
 import pytest
 
@@ -11,7 +10,18 @@
 )
 from tests.conftest import get_search_space, setup_environment
 
-TaskType = Literal["multiclass", "multilabel", "description"]
+
+def test_bayes(dataset):
+    project_dir = setup_environment()
+    search_space = get_search_space("bayes")
+
+    pipeline_optimizer = Pipeline.from_search_space(search_space)
+
+    pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True, clear_ram=True))
+    pipeline_optimizer.set_config(VectorIndexConfig())
+    pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cuda"))
+
+    pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True)
 
 
 @pytest.mark.parametrize(

From 364e60a830c75187e242a90dadf5443be631a378 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 22:07:25 +0300
Subject: [PATCH 58/74] disable search space validation for now

---
 autointent/_pipeline/_pipeline.py   |  2 +-
 tests/assets/configs/bayes.yaml     | 12 +++++++-----
 tests/pipeline/test_optimization.py |  4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index 271efc432..ed2bd1f8d 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -77,7 +77,7 @@ def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str, seed
         """
         if isinstance(search_space, Path | str):
             search_space = load_search_space(search_space)
-        validated_search_space = OptimizationConfig(search_space).model_dump()  # type: ignore[arg-type]
+        validated_search_space = search_space  # OptimizationConfig(search_space).model_dump()  # type: ignore[arg-type]
         nodes = [NodeOptimizer(**node) for node in validated_search_space]
         return cls(nodes=nodes, seed=seed)
 
diff --git a/tests/assets/configs/bayes.yaml b/tests/assets/configs/bayes.yaml
index 5a31f4ec4..a008c7005 100644
--- a/tests/assets/configs/bayes.yaml
+++ b/tests/assets/configs/bayes.yaml
@@ -12,9 +12,10 @@
     - module_name: knn
       k:
         type: "int"
-        low: 5
-        high: 10
-        step: 1
+        content:
+          low: 5
+          high: 10
+          step: 1
       weights: [uniform, distance, closest]
     - module_name: linear
 - node_type: decision
@@ -23,8 +24,9 @@
     - module_name: threshold
       thresh:
         type: float
-        low: 0.1
-        high: 0.9
+        content:
+          low: 0.1
+          high: 0.9
     - module_name: tunable
     - module_name: argmax
     - module_name: jinoos
diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py
index dbf0b1cb9..75104d42e 100644
--- a/tests/pipeline/test_optimization.py
+++ b/tests/pipeline/test_optimization.py
@@ -19,9 +19,9 @@ def test_bayes(dataset):
 
     pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True, clear_ram=True))
     pipeline_optimizer.set_config(VectorIndexConfig())
-    pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cuda"))
+    pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu"))
 
-    pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True)
+    pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True, tuning="bayes")
 
 
 @pytest.mark.parametrize(

From 38475e63fb113b2ac12796192e95885328c6219a Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 22:07:53 +0300
Subject: [PATCH 59/74] fix codestyle

---
 autointent/_pipeline/_pipeline.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index ed2bd1f8d..07d25051e 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -13,7 +13,6 @@
 from autointent.custom_types import ListOfGenericLabels, NodeType, TuningType, ValidationScheme
 from autointent.metrics import PREDICTION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
-from autointent.nodes.schemes import OptimizationConfig
 from autointent.utils import load_default_search_space, load_search_space
 
 from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput

From 9f71c0bb5a528797f4b4367a0f825012bb7c448b Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Tue, 11 Feb 2025 22:22:35 +0300
Subject: [PATCH 60/74] remove `ParamSpaceCat` (it's redundant)

---
 autointent/nodes/_optimization/_node_optimizer.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index d784cf115..010444239 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -18,10 +18,6 @@
 from autointent.nodes._nodes_info import NODES_INFO
 
 
-class ParamSpaceCat(TypedDict):
-    choices: list[Any]
-
-
 class ParamSpaceInt(TypedDict, total=False):
     low: int
     high: int
@@ -38,7 +34,7 @@ class ParamSpaceFloat(TypedDict, total=False):
 
 class ParamSpace(TypedDict):
     type: Literal["cat", "int", "float"]
-    content: ParamSpaceCat | ParamSpaceInt | ParamSpaceFloat
+    content: ParamSpaceInt | ParamSpaceFloat
 
 
 class NodeOptimizer:
@@ -199,8 +195,6 @@ def suggest(self, trial: Trial, search_space: dict[str, ParamSpace | list[Any]])
         for param_name, param_space in search_space.items():
             if isinstance(param_space, list):
                 res[param_name] = trial.suggest_categorical(param_name, choices=param_space)
-            elif param_space["type"] == "cat":
-                res[param_name] = trial.suggest_categorical(param_name, **param_space["content"])
             elif param_space["type"] == "int":
                 res[param_name] = trial.suggest_int(param_name, **param_space["content"])
             elif param_space["type"] == "float":

From 0c5eef6abbe6e0fb84bfd4bbcc08f2612be4caf2 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 12 Feb 2025 11:24:56 +0300
Subject: [PATCH 61/74] move to optuna entirely

---
 autointent/custom_types.py                    |  2 +-
 .../nodes/_optimization/_node_optimizer.py    | 77 ++++---------------
 2 files changed, 16 insertions(+), 63 deletions(-)

diff --git a/autointent/custom_types.py b/autointent/custom_types.py
index d9697b2f5..da0ee16e8 100644
--- a/autointent/custom_types.py
+++ b/autointent/custom_types.py
@@ -71,5 +71,5 @@ class Split:
     INTENTS = "intents"
 
 
-TuningType = Literal["brute", "bayes"]
+TuningType = Literal["brute", "bayes", "random"]
 ValidationScheme = Literal["ho", "cv"]
diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 010444239..d18a48737 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -1,7 +1,6 @@
 """Node optimizer."""
 
 import gc
-import itertools as it
 import logging
 from copy import deepcopy
 from functools import partial
@@ -73,74 +72,28 @@ def fit(self, context: Context, tuning: TuningType = "brute") -> None:
         """
         self._logger.info("starting %s node optimization...", self.node_info.node_type)
 
-        if tuning == "brute":
-            self._fit_brute(context)
-        elif tuning == "bayes":
-            self._fit_bayes(context)
-        else:
-            msg = f"Unexepected tuning type: {tuning}"
-            raise ValueError(msg)
-
-        self._logger.info("%s node optimization is finished!", self.node_info.node_type)
-
-    def _fit_brute(self, context: Context) -> None:
-        for search_space in deepcopy(self.modules_search_spaces):
-            module_name = search_space.pop("module_name")
-
-            for j_combination, params_combination in enumerate(it.product(*search_space.values())):
-                module_kwargs = dict(zip(search_space.keys(), params_combination, strict=False))
-
-                self._logger.debug("initializing %s module...", module_name)
-                module = self.node_info.modules_available[module_name].from_context(context, **module_kwargs)
-
-                embedder_name = module.get_embedder_name()
-                if embedder_name is not None:
-                    module_kwargs["embedder_name"] = embedder_name
-
-                context.callback_handler.start_module(
-                    module_name=module_name, num=j_combination, module_kwargs=module_kwargs
-                )
-
-                self._logger.debug("scoring %s module...", module_name)
-                metrics_score = module.score(context, metrics=self.metrics)
-                metric_value = metrics_score[self.target_metric]
-
-                context.callback_handler.log_metrics(metrics_score)
-                context.callback_handler.end_module()
-
-                dump_dir = context.get_dump_dir()
-
-                if dump_dir is not None:
-                    module_dump_dir = self.get_module_dump_dir(dump_dir, module_name, j_combination)
-                    module.dump(module_dump_dir)
-                else:
-                    module_dump_dir = None
-
-                context.optimization_info.log_module_optimization(
-                    self.node_info.node_type,
-                    module_name,
-                    module_kwargs,
-                    metric_value,
-                    self.target_metric,
-                    module.get_assets(),  # retriever name / scores / predictions
-                    module_dump_dir,
-                    module=module if not context.is_ram_to_clear() else None,
-                )
-
-                if context.is_ram_to_clear():
-                    module.clear_cache()
-                    gc.collect()
-                    torch.cuda.empty_cache()
-
-    def _fit_bayes(self, context: Context, seed: int = 42, n_trials: int = 10) -> None:
         for search_space in deepcopy(self.modules_search_spaces):
             self._counter = 0
-            study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed))
+            if tuning == "bayes":
+                sampler = optuna.samplers.TPESampler(seed=context.seed)
+                n_trials = 10
+            elif tuning == "brute":
+                sampler = optuna.samplers.BruteForceSampler(seed=context.seed)  # type: ignore[assignment]
+                n_trials = None
+            elif tuning == "random":
+                sampler = optuna.samplers.RandomSampler(seed=context.seed)  # type: ignore[assignment]
+                n_trials = 10
+            else:
+                msg = f"Unexpected sampler: {tuning}"
+                raise ValueError(msg)
+            study = optuna.create_study(direction="maximize", sampler=sampler)
             optuna.logging.set_verbosity(optuna.logging.WARNING)
             module_name = search_space.pop("module_name")
             obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context)
             study.optimize(obj, n_trials=n_trials)
 
+        self._logger.info("%s node optimization is finished!", self.node_info.node_type)
+
     def objective(
         self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace | list[Any]], context: Context
     ) -> float:

From 2fb6ac01cd82a11624ca72accbc439a1658d42b7 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 12 Feb 2025 12:02:05 +0300
Subject: [PATCH 62/74] refactor yaml format a little bit

---
 .../nodes/_optimization/_node_optimizer.py    | 34 +++++++++++--------
 tests/assets/configs/bayes.yaml               | 15 ++++----
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index d18a48737..4ae0e420b 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -5,7 +5,7 @@
 from copy import deepcopy
 from functools import partial
 from pathlib import Path
-from typing import Any, Literal, TypedDict
+from typing import Any, TypedDict
 
 import optuna
 import torch
@@ -31,11 +31,6 @@ class ParamSpaceFloat(TypedDict, total=False):
     log: bool
 
 
-class ParamSpace(TypedDict):
-    type: Literal["cat", "int", "float"]
-    content: ParamSpaceInt | ParamSpaceFloat
-
-
 class NodeOptimizer:
     """Node optimizer class."""
 
@@ -74,28 +69,35 @@ def fit(self, context: Context, tuning: TuningType = "brute") -> None:
 
         for search_space in deepcopy(self.modules_search_spaces):
             self._counter = 0
+            module_name = search_space.pop("module_name")
+            n_trials = None
+            if "n_trials" in search_space:
+                n_trials = search_space.pop("n_trials")
             if tuning == "bayes":
                 sampler = optuna.samplers.TPESampler(seed=context.seed)
-                n_trials = 10
+                n_trials = n_trials or 10
             elif tuning == "brute":
                 sampler = optuna.samplers.BruteForceSampler(seed=context.seed)  # type: ignore[assignment]
                 n_trials = None
             elif tuning == "random":
                 sampler = optuna.samplers.RandomSampler(seed=context.seed)  # type: ignore[assignment]
-                n_trials = 10
+                n_trials = n_trials or 10
             else:
                 msg = f"Unexpected sampler: {tuning}"
                 raise ValueError(msg)
             study = optuna.create_study(direction="maximize", sampler=sampler)
             optuna.logging.set_verbosity(optuna.logging.WARNING)
-            module_name = search_space.pop("module_name")
             obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context)
             study.optimize(obj, n_trials=n_trials)
 
         self._logger.info("%s node optimization is finished!", self.node_info.node_type)
 
     def objective(
-        self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace | list[Any]], context: Context
+        self,
+        trial: Trial,
+        module_name: str,
+        search_space: dict[str, ParamSpaceInt | ParamSpaceFloat | list[Any]],
+        context: Context,
     ) -> float:
         config = self.suggest(trial, search_space)
 
@@ -143,15 +145,17 @@ def objective(
 
         return target_metric
 
-    def suggest(self, trial: Trial, search_space: dict[str, ParamSpace | list[Any]]) -> dict[str, Any]:
+    def suggest(
+        self, trial: Trial, search_space: dict[str, ParamSpaceInt | ParamSpaceFloat | list[Any]]
+    ) -> dict[str, Any]:
         res: dict[str, Any] = {}
         for param_name, param_space in search_space.items():
             if isinstance(param_space, list):
                 res[param_name] = trial.suggest_categorical(param_name, choices=param_space)
-            elif param_space["type"] == "int":
-                res[param_name] = trial.suggest_int(param_name, **param_space["content"])
-            elif param_space["type"] == "float":
-                res[param_name] = trial.suggest_float(param_name, **param_space["content"])
+            elif all(isinstance(v, int) for v in param_space.values()):
+                res[param_name] = trial.suggest_int(param_name, **param_space)
+            elif all(isinstance(v, float) for v in param_space.values()):
+                res[param_name] = trial.suggest_float(param_name, **param_space)
             else:
                 msg = f"Unsupported type of param search space: {param_space}"
                 raise TypeError(msg)
diff --git a/tests/assets/configs/bayes.yaml b/tests/assets/configs/bayes.yaml
index a008c7005..636e217fc 100644
--- a/tests/assets/configs/bayes.yaml
+++ b/tests/assets/configs/bayes.yaml
@@ -10,12 +10,11 @@
   target_metric: scoring_roc_auc
   search_space:
     - module_name: knn
+      n_trials: 3
       k:
-        type: "int"
-        content:
-          low: 5
-          high: 10
-          step: 1
+        low: 5
+        high: 10
+        step: 1
       weights: [uniform, distance, closest]
     - module_name: linear
 - node_type: decision
@@ -23,10 +22,8 @@
   search_space:
     - module_name: threshold
       thresh:
-        type: float
-        content:
-          low: 0.1
-          high: 0.9
+        low: 0.1
+        high: 0.9
     - module_name: tunable
     - module_name: argmax
     - module_name: jinoos

From 5072810aea54dc4859d607166374eb977613446c Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 12 Feb 2025 12:12:06 +0300
Subject: [PATCH 63/74] add test for random sampler

---
 tests/pipeline/test_optimization.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py
index 75104d42e..9549c6fd8 100644
--- a/tests/pipeline/test_optimization.py
+++ b/tests/pipeline/test_optimization.py
@@ -11,7 +11,11 @@
 from tests.conftest import get_search_space, setup_environment
 
 
-def test_bayes(dataset):
+@pytest.mark.parametrize(
+    "tuning",
+    ["bayes", "random"],
+)
+def test_bayes(dataset, tuning):
     project_dir = setup_environment()
     search_space = get_search_space("bayes")
 
@@ -21,7 +25,7 @@ def test_bayes(dataset):
     pipeline_optimizer.set_config(VectorIndexConfig())
     pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu"))
 
-    pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True, tuning="bayes")
+    pipeline_optimizer.fit(dataset, scheme="ho", refit_after=False, tuning=tuning)
 
 
 @pytest.mark.parametrize(

From b6669ecf4b9da9e2398402237ed139e53fac38b4 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Wed, 12 Feb 2025 12:15:55 +0300
Subject: [PATCH 64/74] rename some variables

---
 autointent/_pipeline/_pipeline.py             | 10 +++++-----
 autointent/configs/_optimization.py           |  4 ++--
 autointent/custom_types.py                    |  2 +-
 .../nodes/_optimization/_node_optimizer.py    | 20 +++++++++----------
 .../configs/{bayes.yaml => optuna.yaml}       |  0
 tests/conftest.py                             |  2 +-
 tests/pipeline/test_optimization.py           | 10 +++++-----
 7 files changed, 24 insertions(+), 24 deletions(-)
 rename tests/assets/configs/{bayes.yaml => optuna.yaml} (100%)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index 07d25051e..fe9a12e50 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -10,7 +10,7 @@
 
 from autointent import Context, Dataset
 from autointent.configs import CrossEncoderConfig, EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig
-from autointent.custom_types import ListOfGenericLabels, NodeType, TuningType, ValidationScheme
+from autointent.custom_types import ListOfGenericLabels, NodeType, SamplerType, ValidationScheme
 from autointent.metrics import PREDICTION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
 from autointent.utils import load_default_search_space, load_search_space
@@ -92,7 +92,7 @@ def default_optimizer(cls, multilabel: bool, seed: int = 42) -> "Pipeline":
         """
         return cls.from_search_space(search_space=load_default_search_space(multilabel), seed=seed)
 
-    def _fit(self, context: Context, tuning: TuningType = "brute") -> None:
+    def _fit(self, context: Context, sampler: SamplerType = "brute") -> None:
         """
         Optimize the pipeline.
 
@@ -107,7 +107,7 @@ def _fit(self, context: Context, tuning: TuningType = "brute") -> None:
         for node_type in NodeType:
             node_optimizer = self.nodes.get(node_type, None)
             if node_optimizer is not None:
-                node_optimizer.fit(context, tuning)  # type: ignore[union-attr]
+                node_optimizer.fit(context, sampler)  # type: ignore[union-attr]
         if not context.vector_index_config.save_db:
             self._logger.info("removing vector database from file system...")
             # TODO clear cache from appdirs
@@ -127,7 +127,7 @@ def fit(
         scheme: ValidationScheme = "ho",
         n_folds: int = 3,
         refit_after: bool = False,
-        tuning: TuningType = "brute",
+        sampler: SamplerType = "brute",
     ) -> Context:
         """
         Optimize the pipeline from dataset.
@@ -145,7 +145,7 @@ def fit(
         context.configure_vector_index(self.vector_index_config, self.embedder_config)
         context.configure_cross_encoder(self.cross_encoder_config)
         self.validate_modules(dataset)
-        self._fit(context, tuning)
+        self._fit(context, sampler)
 
         if context.is_ram_to_clear():
             nodes_configs = context.optimization_info.get_inference_nodes_config()
diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py
index bb0ab278e..fb8e2068b 100644
--- a/autointent/configs/_optimization.py
+++ b/autointent/configs/_optimization.py
@@ -4,7 +4,7 @@
 
 from pydantic import BaseModel, Field
 
-from autointent.custom_types import TuningType, ValidationScheme
+from autointent.custom_types import SamplerType, ValidationScheme
 
 from ._name import get_run_name
 
@@ -25,7 +25,7 @@ class TaskConfig(BaseModel):
 
     search_space_path: Path | None = None
     """Path to the search space configuration file. If None, the default search space will be used"""
-    sampler: TuningType = "brute"
+    sampler: SamplerType = "brute"
 
 
 class LoggingConfig(BaseModel):
diff --git a/autointent/custom_types.py b/autointent/custom_types.py
index da0ee16e8..ae5f9e6e2 100644
--- a/autointent/custom_types.py
+++ b/autointent/custom_types.py
@@ -71,5 +71,5 @@ class Split:
     INTENTS = "intents"
 
 
-TuningType = Literal["brute", "bayes", "random"]
+SamplerType = Literal["brute", "tpe", "random"]
 ValidationScheme = Literal["ho", "cv"]
diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 4ae0e420b..9a4a2493a 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -13,7 +13,7 @@
 
 from autointent import Dataset
 from autointent.context import Context
-from autointent.custom_types import NodeType, TuningType
+from autointent.custom_types import NodeType, SamplerType
 from autointent.nodes._nodes_info import NODES_INFO
 
 
@@ -59,7 +59,7 @@ def __init__(
         self.modules_search_spaces = search_space
         self._logger = logging.getLogger(__name__)  # TODO solve duplicate logging messages problem
 
-    def fit(self, context: Context, tuning: TuningType = "brute") -> None:
+    def fit(self, context: Context, sampler: SamplerType = "brute") -> None:
         """
         Fit the node optimizer.
 
@@ -73,19 +73,19 @@ def fit(self, context: Context, tuning: TuningType = "brute") -> None:
             n_trials = None
             if "n_trials" in search_space:
                 n_trials = search_space.pop("n_trials")
-            if tuning == "bayes":
-                sampler = optuna.samplers.TPESampler(seed=context.seed)
+            if sampler == "tpe":
+                sampler_instance = optuna.samplers.TPESampler(seed=context.seed)
                 n_trials = n_trials or 10
-            elif tuning == "brute":
-                sampler = optuna.samplers.BruteForceSampler(seed=context.seed)  # type: ignore[assignment]
+            elif sampler == "brute":
+                sampler_instance = optuna.samplers.BruteForceSampler(seed=context.seed)  # type: ignore[assignment]
                 n_trials = None
-            elif tuning == "random":
-                sampler = optuna.samplers.RandomSampler(seed=context.seed)  # type: ignore[assignment]
+            elif sampler == "random":
+                sampler_instance = optuna.samplers.RandomSampler(seed=context.seed)  # type: ignore[assignment]
                 n_trials = n_trials or 10
             else:
-                msg = f"Unexpected sampler: {tuning}"
+                msg = f"Unexpected sampler: {sampler}"
                 raise ValueError(msg)
-            study = optuna.create_study(direction="maximize", sampler=sampler)
+            study = optuna.create_study(direction="maximize", sampler=sampler_instance)
             optuna.logging.set_verbosity(optuna.logging.WARNING)
             obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context)
             study.optimize(obj, n_trials=n_trials)
diff --git a/tests/assets/configs/bayes.yaml b/tests/assets/configs/optuna.yaml
similarity index 100%
rename from tests/assets/configs/bayes.yaml
rename to tests/assets/configs/optuna.yaml
diff --git a/tests/conftest.py b/tests/conftest.py
index 4d4190d4c..002812907 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -27,7 +27,7 @@ def dataset_unsplitted():
     return Dataset.from_json(path)
 
 
-TaskType = Literal["multiclass", "multilabel", "description", "bayes"]
+TaskType = Literal["multiclass", "multilabel", "description", "optuna"]
 
 
 def get_search_space_path(task_type: TaskType):
diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py
index 9549c6fd8..109058d9f 100644
--- a/tests/pipeline/test_optimization.py
+++ b/tests/pipeline/test_optimization.py
@@ -12,12 +12,12 @@
 
 
 @pytest.mark.parametrize(
-    "tuning",
-    ["bayes", "random"],
+    "sampler",
+    ["tpe", "random"],
 )
-def test_bayes(dataset, tuning):
+def test_bayes(dataset, sampler):
     project_dir = setup_environment()
-    search_space = get_search_space("bayes")
+    search_space = get_search_space("optuna")
 
     pipeline_optimizer = Pipeline.from_search_space(search_space)
 
@@ -25,7 +25,7 @@ def test_bayes(dataset, tuning):
     pipeline_optimizer.set_config(VectorIndexConfig())
     pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu"))
 
-    pipeline_optimizer.fit(dataset, scheme="ho", refit_after=False, tuning=tuning)
+    pipeline_optimizer.fit(dataset, scheme="ho", refit_after=False, sampler=sampler)
 
 
 @pytest.mark.parametrize(

From 2b4ac0d04291e37bae7057b0295aa4597e32bbb1 Mon Sep 17 00:00:00 2001
From: Roman Solomatin <samoed.roman@gmail.com>
Date: Fri, 14 Feb 2025 10:56:42 +0300
Subject: [PATCH 65/74] add config validation for optuna (#132)

* add config validation

* add validation for union types

* remove debug code

* remove comment

* run tests on pr for all branches

* fix mlknn

* fix type validation
---
 .github/workflows/test-inference.yaml         |   2 -
 .github/workflows/test-nodes.yaml             |   2 -
 .github/workflows/test-optimization.yaml      |   2 -
 .github/workflows/unit-tests.yaml             |   2 -
 autointent/configs/_optimization.py           |   4 +-
 autointent/custom_types.py                    |   8 +-
 autointent/modules/decision/_adaptive.py      |   6 +-
 autointent/modules/decision/_jinoos.py        |   6 +-
 autointent/modules/decision/_threshold.py     |   8 +-
 autointent/modules/decision/_tunable.py       |   5 +-
 autointent/modules/embedding/_logreg.py       |   5 +-
 autointent/modules/embedding/_retrieval.py    |   6 +-
 .../scoring/_description/description.py       |   5 +-
 autointent/modules/scoring/_dnnc/dnnc.py      |   5 +-
 autointent/modules/scoring/_knn/knn.py        |   5 +-
 autointent/modules/scoring/_mlknn/mlknn.py    |   9 +-
 .../nodes/_optimization/_node_optimizer.py    |  41 +-
 autointent/nodes/schemes.py                   |  63 ++-
 docs/optimizer_config.schema.json             | 532 +++++++++++++++---
 19 files changed, 569 insertions(+), 147 deletions(-)

diff --git a/.github/workflows/test-inference.yaml b/.github/workflows/test-inference.yaml
index ca4c45ff0..a68ef07e8 100644
--- a/.github/workflows/test-inference.yaml
+++ b/.github/workflows/test-inference.yaml
@@ -5,8 +5,6 @@ on:
     branches:
       - dev
   pull_request:
-    branches:
-      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/test-nodes.yaml b/.github/workflows/test-nodes.yaml
index 99507571b..b10161724 100644
--- a/.github/workflows/test-nodes.yaml
+++ b/.github/workflows/test-nodes.yaml
@@ -5,8 +5,6 @@ on:
     branches:
       - dev
   pull_request:
-    branches:
-      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/test-optimization.yaml b/.github/workflows/test-optimization.yaml
index ea1cf861e..4625f39d7 100644
--- a/.github/workflows/test-optimization.yaml
+++ b/.github/workflows/test-optimization.yaml
@@ -5,8 +5,6 @@ on:
     branches:
       - dev
   pull_request:
-    branches:
-      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
index 3612d561f..5883080eb 100644
--- a/.github/workflows/unit-tests.yaml
+++ b/.github/workflows/unit-tests.yaml
@@ -5,8 +5,6 @@ on:
     branches:
       - dev
   pull_request:
-    branches:
-      - dev
 
 jobs:
   test:
diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py
index fb8e2068b..1d85081a4 100644
--- a/autointent/configs/_optimization.py
+++ b/autointent/configs/_optimization.py
@@ -2,7 +2,7 @@
 
 from pathlib import Path
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, PositiveInt
 
 from autointent.custom_types import SamplerType, ValidationScheme
 
@@ -16,7 +16,7 @@ class DataConfig(BaseModel):
     """Path to the training data. Can be local path or HF repo."""
     scheme: ValidationScheme
     """Hold-out or cross-validation."""
-    n_folds: int = 3
+    n_folds: PositiveInt = 3
     """Number of folds in cross-validation."""
 
 
diff --git a/autointent/custom_types.py b/autointent/custom_types.py
index ae5f9e6e2..b0318ee38 100644
--- a/autointent/custom_types.py
+++ b/autointent/custom_types.py
@@ -5,7 +5,9 @@
 """
 
 from enum import Enum
-from typing import Literal, TypeAlias
+from typing import Annotated, Literal, TypeAlias
+
+from annotated_types import Interval
 
 
 class LogLevel(Enum):
@@ -73,3 +75,7 @@ class Split:
 
 SamplerType = Literal["brute", "tpe", "random"]
 ValidationScheme = Literal["ho", "cv"]
+
+
+FloatFromZeroToOne = Annotated[float, Interval(ge=0, le=1)]
+"""Float value between 0 and 1, inclusive."""
diff --git a/autointent/modules/decision/_adaptive.py b/autointent/modules/decision/_adaptive.py
index e0d155a6e..806a35444 100644
--- a/autointent/modules/decision/_adaptive.py
+++ b/autointent/modules/decision/_adaptive.py
@@ -7,7 +7,7 @@
 import numpy.typing as npt
 
 from autointent import Context
-from autointent.custom_types import ListOfGenericLabels, ListOfLabelsWithOOS, MultiLabel
+from autointent.custom_types import FloatFromZeroToOne, ListOfGenericLabels, ListOfLabelsWithOOS, MultiLabel
 from autointent.exceptions import MismatchNumClassesError
 from autointent.metrics import decision_f1
 from autointent.modules.abc import DecisionModule
@@ -59,7 +59,7 @@ class AdaptiveDecision(DecisionModule):
     supports_oos = False
     name = "adaptive"
 
-    def __init__(self, search_space: list[float] | None = None) -> None:
+    def __init__(self, search_space: list[FloatFromZeroToOne] | None = None) -> None:
         """
         Initialize the AdaptiveDecision.
 
@@ -69,7 +69,7 @@ def __init__(self, search_space: list[float] | None = None) -> None:
         self.search_space = search_space if search_space is not None else default_search_space
 
     @classmethod
-    def from_context(cls, context: Context, search_space: list[float] | None = None) -> "AdaptiveDecision":
+    def from_context(cls, context: Context, search_space: list[FloatFromZeroToOne] | None = None) -> "AdaptiveDecision":
         """
         Create an AdaptiveDecision instance using a Context object.
 
diff --git a/autointent/modules/decision/_jinoos.py b/autointent/modules/decision/_jinoos.py
index f266d66ec..14676c2b6 100644
--- a/autointent/modules/decision/_jinoos.py
+++ b/autointent/modules/decision/_jinoos.py
@@ -6,7 +6,7 @@
 import numpy.typing as npt
 
 from autointent import Context
-from autointent.custom_types import ListOfGenericLabels
+from autointent.custom_types import FloatFromZeroToOne, ListOfGenericLabels
 from autointent.exceptions import MismatchNumClassesError
 from autointent.modules.abc import DecisionModule
 from autointent.schemas import Tag
@@ -55,7 +55,7 @@ class JinoosDecision(DecisionModule):
 
     def __init__(
         self,
-        search_space: list[float] | None = None,
+        search_space: list[FloatFromZeroToOne] | None = None,
     ) -> None:
         """
         Initialize Jinoos predictor.
@@ -65,7 +65,7 @@ def __init__(
         self.search_space = np.array(search_space) if search_space is not None else default_search_space
 
     @classmethod
-    def from_context(cls, context: Context, search_space: list[float] | None = None) -> "JinoosDecision":
+    def from_context(cls, context: Context, search_space: list[FloatFromZeroToOne] | None = None) -> "JinoosDecision":
         """
         Initialize from context.
 
diff --git a/autointent/modules/decision/_threshold.py b/autointent/modules/decision/_threshold.py
index 76bf0f281..42dcbca20 100644
--- a/autointent/modules/decision/_threshold.py
+++ b/autointent/modules/decision/_threshold.py
@@ -7,7 +7,7 @@
 import numpy.typing as npt
 
 from autointent import Context
-from autointent.custom_types import ListOfGenericLabels, MultiLabel
+from autointent.custom_types import FloatFromZeroToOne, ListOfGenericLabels, MultiLabel
 from autointent.exceptions import MismatchNumClassesError
 from autointent.modules.abc import DecisionModule
 from autointent.schemas import Tag
@@ -75,7 +75,7 @@ class ThresholdDecision(DecisionModule):
 
     def __init__(
         self,
-        thresh: float | list[float],
+        thresh: FloatFromZeroToOne | list[FloatFromZeroToOne],
     ) -> None:
         """
         Initialize threshold predictor.
@@ -85,7 +85,9 @@ def __init__(
         self.thresh = thresh if isinstance(thresh, float) else np.array(thresh)
 
     @classmethod
-    def from_context(cls, context: Context, thresh: float | list[float] = 0.5) -> "ThresholdDecision":
+    def from_context(
+        cls, context: Context, thresh: FloatFromZeroToOne | list[FloatFromZeroToOne] = 0.5
+    ) -> "ThresholdDecision":
         """
         Initialize from context.
 
diff --git a/autointent/modules/decision/_tunable.py b/autointent/modules/decision/_tunable.py
index 34d0c2ce9..82f348b99 100644
--- a/autointent/modules/decision/_tunable.py
+++ b/autointent/modules/decision/_tunable.py
@@ -6,6 +6,7 @@
 import numpy.typing as npt
 import optuna
 from optuna.trial import Trial
+from pydantic import PositiveInt
 
 from autointent.context import Context
 from autointent.custom_types import ListOfGenericLabels
@@ -77,7 +78,7 @@ class TunableDecision(DecisionModule):
 
     def __init__(
         self,
-        n_trials: int = 320,
+        n_trials: PositiveInt = 320,
         seed: int = 0,
         tags: list[Tag] | None = None,
     ) -> None:
@@ -93,7 +94,7 @@ def __init__(
         self.tags = tags
 
     @classmethod
-    def from_context(cls, context: Context, n_trials: int = 320) -> "TunableDecision":
+    def from_context(cls, context: Context, n_trials: PositiveInt = 320) -> "TunableDecision":
         """
         Initialize from context.
 
diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py
index 64283ee9c..496b245ab 100644
--- a/autointent/modules/embedding/_logreg.py
+++ b/autointent/modules/embedding/_logreg.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from pydantic import PositiveInt
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
 from sklearn.multioutput import MultiOutputClassifier
 from sklearn.preprocessing import LabelEncoder
@@ -48,7 +49,7 @@ class LogregAimedEmbedding(EmbeddingModule):
     def __init__(
         self,
         embedder_name: str,
-        cv: int = 3,
+        cv: PositiveInt = 3,
         embedder_device: str = "cpu",
         embedder_batch_size: int = 32,
         embedder_max_length: int | None = None,
@@ -76,7 +77,7 @@ def from_context(
         cls,
         context: Context,
         embedder_name: str,
-        cv: int = 3,
+        cv: PositiveInt = 3,
     ) -> "LogregAimedEmbedding":
         """
         Create a LogregAimedEmbedding instance using a Context object.
diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py
index 5063a1b00..2ca62eff1 100644
--- a/autointent/modules/embedding/_retrieval.py
+++ b/autointent/modules/embedding/_retrieval.py
@@ -1,5 +1,7 @@
 """RetrievalAimedEmbedding class for a proxy optimization of embedding."""
 
+from pydantic import PositiveInt
+
 from autointent import Context, VectorIndex
 from autointent.context.optimization_info import RetrieverArtifact
 from autointent.custom_types import ListOfLabels
@@ -41,7 +43,7 @@ class RetrievalAimedEmbedding(EmbeddingModule):
 
     def __init__(
         self,
-        k: int,
+        k: PositiveInt,
         embedder_name: str,
         embedder_device: str = "cpu",
         embedder_batch_size: int = 32,
@@ -69,7 +71,7 @@ def __init__(
     def from_context(
         cls,
         context: Context,
-        k: int,
+        k: PositiveInt,
         embedder_name: str,
     ) -> "RetrievalAimedEmbedding":
         """
diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py
index 79ade2aa7..ff55ae334 100644
--- a/autointent/modules/scoring/_description/description.py
+++ b/autointent/modules/scoring/_description/description.py
@@ -5,6 +5,7 @@
 import numpy as np
 import scipy
 from numpy.typing import NDArray
+from pydantic import PositiveFloat
 from sklearn.metrics.pairwise import cosine_similarity
 
 from autointent import Context, Embedder
@@ -37,7 +38,7 @@ class DescriptionScorer(ScoringModule):
     def __init__(
         self,
         embedder_name: str,
-        temperature: float = 1.0,
+        temperature: PositiveFloat = 1.0,
         embedder_device: str = "cpu",
         embedder_batch_size: int = 32,
         embedder_max_length: int | None = None,
@@ -64,7 +65,7 @@ def __init__(
     def from_context(
         cls,
         context: Context,
-        temperature: float,
+        temperature: PositiveFloat,
         embedder_name: str | None = None,
     ) -> "DescriptionScorer":
         """
diff --git a/autointent/modules/scoring/_dnnc/dnnc.py b/autointent/modules/scoring/_dnnc/dnnc.py
index 0610785ae..84b51a496 100644
--- a/autointent/modules/scoring/_dnnc/dnnc.py
+++ b/autointent/modules/scoring/_dnnc/dnnc.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import numpy.typing as npt
+from pydantic import PositiveInt
 
 from autointent import Context, Ranker, VectorIndex
 from autointent.custom_types import ListOfLabels
@@ -77,7 +78,7 @@ def __init__(  # noqa: PLR0913
         self,
         cross_encoder_name: str,
         embedder_name: str,
-        k: int,
+        k: PositiveInt,
         embedder_device: str = "cpu",
         embedder_batch_size: int = 32,
         embedder_max_length: int | None = None,
@@ -118,7 +119,7 @@ def from_context(
         cls,
         context: Context,
         cross_encoder_name: str,
-        k: int,
+        k: PositiveInt,
         embedder_name: str | None = None,
         train_head: bool = False,
     ) -> "DNNCScorer":
diff --git a/autointent/modules/scoring/_knn/knn.py b/autointent/modules/scoring/_knn/knn.py
index ab665dfe7..eaa5ce864 100644
--- a/autointent/modules/scoring/_knn/knn.py
+++ b/autointent/modules/scoring/_knn/knn.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import numpy.typing as npt
+from pydantic import PositiveInt
 
 from autointent import Context, VectorIndex
 from autointent.custom_types import WEIGHT_TYPES, ListOfLabels
@@ -57,7 +58,7 @@ class KNNScorer(ScoringModule):
     def __init__(
         self,
         embedder_name: str,
-        k: int,
+        k: PositiveInt,
         weights: WEIGHT_TYPES = "distance",
         embedder_device: str = "cpu",
         embedder_batch_size: int = 32,
@@ -90,7 +91,7 @@ def __init__(
     def from_context(
         cls,
         context: Context,
-        k: int,
+        k: PositiveInt,
         weights: WEIGHT_TYPES,
         embedder_name: str | None = None,
     ) -> "KNNScorer":
diff --git a/autointent/modules/scoring/_mlknn/mlknn.py b/autointent/modules/scoring/_mlknn/mlknn.py
index b43763ab0..a25220f70 100644
--- a/autointent/modules/scoring/_mlknn/mlknn.py
+++ b/autointent/modules/scoring/_mlknn/mlknn.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from pydantic import NonNegativeInt, PositiveFloat, PositiveInt
 
 from autointent import Context, VectorIndex
 from autointent.custom_types import ListOfLabels
@@ -57,7 +58,7 @@ class MLKnnScorer(ScoringModule):
 
     def __init__(
         self,
-        k: int,
+        k: PositiveInt,
         embedder_name: str,
         s: float = 1.0,
         ignore_first_neighbours: int = 0,
@@ -91,9 +92,9 @@ def __init__(
     def from_context(
         cls,
         context: Context,
-        k: int,
-        s: float = 1.0,
-        ignore_first_neighbours: int = 0,
+        k: PositiveInt,
+        s: PositiveFloat = 1.0,
+        ignore_first_neighbours: NonNegativeInt = 0,
         embedder_name: str | None = None,
     ) -> "MLKnnScorer":
         """
diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index 9a4a2493a..bca970ebd 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -5,11 +5,12 @@
 from copy import deepcopy
 from functools import partial
 from pathlib import Path
-from typing import Any, TypedDict
+from typing import Any
 
 import optuna
 import torch
 from optuna.trial import Trial
+from pydantic import BaseModel, Field
 
 from autointent import Dataset
 from autointent.context import Context
@@ -17,18 +18,18 @@
 from autointent.nodes._nodes_info import NODES_INFO
 
 
-class ParamSpaceInt(TypedDict, total=False):
-    low: int
-    high: int
-    step: int
-    log: bool
+class ParamSpaceInt(BaseModel):
+    low: int = Field(..., description="Low boundary of the search space.")
+    high: int = Field(..., description="High boundary of the search space.")
+    step: int = Field(1, description="Step of the search space.")
+    log: bool = Field(False, description="Whether to use a logarithmic scale.")
 
 
-class ParamSpaceFloat(TypedDict, total=False):
-    low: float
-    high: float
-    step: float
-    log: bool
+class ParamSpaceFloat(BaseModel):
+    low: float = Field(..., description="Low boundary of the search space.")
+    high: float = Field(..., description="High boundary of the search space.")
+    step: float = Field(0.1, description="Step of the search space.")
+    log: bool = Field(False, description="Whether to use a logarithmic scale.")
 
 
 class NodeOptimizer:
@@ -145,16 +146,24 @@ def objective(
 
         return target_metric
 
-    def suggest(
-        self, trial: Trial, search_space: dict[str, ParamSpaceInt | ParamSpaceFloat | list[Any]]
-    ) -> dict[str, Any]:
+    def suggest(self, trial: Trial, search_space: dict[str, Any | list[Any]]) -> dict[str, Any]:
         res: dict[str, Any] = {}
+
+        def is_valid_param_space(
+            param_space: dict[str, Any], space_type: type[ParamSpaceInt | ParamSpaceFloat]
+        ) -> bool:
+            try:
+                space_type(**param_space)
+                return True  # noqa: TRY300
+            except ValueError:
+                return False
+
         for param_name, param_space in search_space.items():
             if isinstance(param_space, list):
                 res[param_name] = trial.suggest_categorical(param_name, choices=param_space)
-            elif all(isinstance(v, int) for v in param_space.values()):
+            elif is_valid_param_space(param_space, ParamSpaceInt):
                 res[param_name] = trial.suggest_int(param_name, **param_space)
-            elif all(isinstance(v, float) for v in param_space.values()):
+            elif is_valid_param_space(param_space, ParamSpaceFloat):
                 res[param_name] = trial.suggest_float(param_name, **param_space)
             else:
                 msg = f"Unsupported type of param search space: {param_space}"
diff --git a/autointent/nodes/schemes.py b/autointent/nodes/schemes.py
index 58cba623b..acfc71fab 100644
--- a/autointent/nodes/schemes.py
+++ b/autointent/nodes/schemes.py
@@ -2,13 +2,58 @@
 
 import inspect
 from collections.abc import Iterator
-from typing import Any, Literal, TypeAlias, Union, get_type_hints
+from typing import Annotated, Any, Literal, TypeAlias, Union, get_args, get_origin, get_type_hints
 
-from pydantic import BaseModel, Field, RootModel
+from pydantic import BaseModel, Field, PositiveInt, RootModel
 
 from autointent.custom_types import NodeType
 from autointent.modules.abc import Module
 from autointent.nodes import DecisionNodeInfo, EmbeddingNodeInfo, ScoringNodeInfo
+from autointent.nodes._optimization._node_optimizer import ParamSpaceFloat, ParamSpaceInt
+
+
+def unwrap_annotated(tp: type) -> type:
+    """
+    Unwrap the Annotated type to get the actual type.
+
+    :param tp: Type to unwrap
+    :return: Unwrapped type
+    """
+    return get_args(tp)[0] if get_origin(tp) is Annotated else tp
+
+
+def type_matches(target: type, tp: type) -> bool:
+    """
+    Recursively check if the target type is present in the given type.
+
+    This function handles union types by unwrapping Annotated types where necessary.
+
+    :param target: Target type
+    :param tp: Given type
+    :return: If the target type is present in the given type
+    """
+    origin = get_origin(tp)
+
+    if origin is Union:  # float | list[float]
+        return any(type_matches(target, arg) for arg in get_args(tp))
+    return unwrap_annotated(tp) is target
+
+
+def get_optuna_class(param_type: type) -> type[ParamSpaceInt | ParamSpaceFloat] | None:
+    """
+    Get the Optuna class for the given parameter type.
+
+    If the (possibly annotated or union) type includes int or float, this function
+    returns the corresponding search space class.
+
+    :param param_type: Parameter type (could be a union, annotated type, or container)
+    :return: ParamSpaceInt if the type matches int, ParamSpaceFloat if it matches float, else None.
+    """
+    if type_matches(int, param_type):
+        return ParamSpaceInt
+    if type_matches(float, param_type):
+        return ParamSpaceFloat
+    return None
 
 
 def generate_models_and_union_type_for_classes(
@@ -20,9 +65,12 @@ def generate_models_and_union_type_for_classes(
     for cls in classes:
         init_signature = inspect.signature(cls.from_context)
         globalns = getattr(cls.from_context, "__globals__", {})
-        type_hints = get_type_hints(cls.from_context, globalns, None)  # Resolve forward refs
+        type_hints = get_type_hints(cls.from_context, globalns, None, include_extras=True)  # Resolve forward refs
 
-        fields = {"module_name": (Literal[cls.name], Field(...))}
+        fields = {
+            "module_name": (Literal[cls.name], Field(...)),
+            "n_trials": (PositiveInt | None, Field(None, description="Number of trials")),
+        }
 
         for param_name, param in init_signature.parameters.items():
             if param_name in ("self", "cls", "context"):
@@ -30,8 +78,11 @@ def generate_models_and_union_type_for_classes(
 
             param_type: TypeAlias = type_hints.get(param_name, Any)  # type: ignore[valid-type]  # noqa: PYI042
             field = Field(default=[param.default]) if param.default is not inspect.Parameter.empty else Field(...)
-
-            fields[param_name] = (list[param_type], field)  # type: ignore[assignment]
+            search_type = get_optuna_class(param_type)
+            if search_type is None:
+                fields[param_name] = (list[param_type], field)
+            else:
+                fields[param_name] = (list[param_type] | search_type, field)
 
         model_name = f"{cls.__name__}InitModel"
         models[cls.__name__] = type(
diff --git a/docs/optimizer_config.schema.json b/docs/optimizer_config.schema.json
index 73e79757e..6980b460f 100644
--- a/docs/optimizer_config.schema.json
+++ b/docs/optimizer_config.schema.json
@@ -7,6 +7,20 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "search_space": {
                     "default": [
                         null
@@ -15,6 +29,8 @@
                         "anyOf": [
                             {
                                 "items": {
+                                    "maximum": 1.0,
+                                    "minimum": 0.0,
                                     "type": "number"
                                 },
                                 "type": "array"
@@ -40,6 +56,20 @@
                     "const": "argmax",
                     "title": "Module Name",
                     "type": "string"
+                },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
                 }
             },
             "required": [
@@ -55,6 +85,20 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "cross_encoder_name": {
                     "items": {
                         "type": "string"
@@ -63,11 +107,19 @@
                     "type": "array"
                 },
                 "k": {
-                    "items": {
-                        "type": "integer"
-                    },
-                    "title": "K",
-                    "type": "array"
+                    "anyOf": [
+                        {
+                            "items": {
+                                "exclusiveMinimum": 0,
+                                "type": "integer"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
+                    "title": "K"
                 },
                 "embedder_name": {
                     "default": [
@@ -183,12 +235,34 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "temperature": {
-                    "items": {
-                        "type": "number"
-                    },
-                    "title": "Temperature",
-                    "type": "array"
+                    "anyOf": [
+                        {
+                            "items": {
+                                "exclusiveMinimum": 0.0,
+                                "type": "number"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceFloat"
+                        }
+                    ],
+                    "title": "Temperature"
                 },
                 "embedder_name": {
                     "default": [
@@ -324,6 +398,20 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "search_space": {
                     "default": [
                         null
@@ -332,6 +420,8 @@
                         "anyOf": [
                             {
                                 "items": {
+                                    "maximum": 1.0,
+                                    "minimum": 0.0,
                                     "type": "number"
                                 },
                                 "type": "array"
@@ -358,12 +448,34 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "k": {
-                    "items": {
-                        "type": "integer"
-                    },
-                    "title": "K",
-                    "type": "array"
+                    "anyOf": [
+                        {
+                            "items": {
+                                "exclusiveMinimum": 0,
+                                "type": "integer"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
+                    "title": "K"
                 },
                 "weights": {
                     "items": {
@@ -410,6 +522,20 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "embedder_name": {
                     "default": [
                         null
@@ -441,6 +567,20 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "embedder_name": {
                     "items": {
                         "type": "string"
@@ -449,14 +589,22 @@
                     "type": "array"
                 },
                 "cv": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "exclusiveMinimum": 0,
+                                "type": "integer"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
                     "default": [
                         3
                     ],
-                    "items": {
-                        "type": "integer"
-                    },
-                    "title": "Cv",
-                    "type": "array"
+                    "title": "Cv"
                 }
             },
             "required": [
@@ -473,32 +621,70 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "k": {
-                    "items": {
-                        "type": "integer"
-                    },
-                    "title": "K",
-                    "type": "array"
+                    "anyOf": [
+                        {
+                            "items": {
+                                "exclusiveMinimum": 0,
+                                "type": "integer"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
+                    "title": "K"
                 },
                 "s": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "exclusiveMinimum": 0.0,
+                                "type": "number"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceFloat"
+                        }
+                    ],
                     "default": [
                         1.0
                     ],
-                    "items": {
-                        "type": "number"
-                    },
-                    "title": "S",
-                    "type": "array"
+                    "title": "S"
                 },
                 "ignore_first_neighbours": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "minimum": 0,
+                                "type": "integer"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
                     "default": [
                         0
                     ],
-                    "items": {
-                        "type": "integer"
-                    },
-                    "title": "Ignore First Neighbours",
-                    "type": "array"
+                    "title": "Ignore First Neighbours"
                 },
                 "embedder_name": {
                     "default": [
@@ -536,6 +722,70 @@
             "title": "NodeType",
             "type": "string"
         },
+        "ParamSpaceFloat": {
+            "properties": {
+                "low": {
+                    "description": "Low boundary of the search space.",
+                    "title": "Low",
+                    "type": "number"
+                },
+                "high": {
+                    "description": "High boundary of the search space.",
+                    "title": "High",
+                    "type": "number"
+                },
+                "step": {
+                    "default": 0.1,
+                    "description": "Step of the search space.",
+                    "title": "Step",
+                    "type": "number"
+                },
+                "log": {
+                    "default": false,
+                    "description": "Whether to use a logarithmic scale.",
+                    "title": "Log",
+                    "type": "boolean"
+                }
+            },
+            "required": [
+                "low",
+                "high"
+            ],
+            "title": "ParamSpaceFloat",
+            "type": "object"
+        },
+        "ParamSpaceInt": {
+            "properties": {
+                "low": {
+                    "description": "Low boundary of the search space.",
+                    "title": "Low",
+                    "type": "integer"
+                },
+                "high": {
+                    "description": "High boundary of the search space.",
+                    "title": "High",
+                    "type": "integer"
+                },
+                "step": {
+                    "default": 1,
+                    "description": "Step of the search space.",
+                    "title": "Step",
+                    "type": "integer"
+                },
+                "log": {
+                    "default": false,
+                    "description": "Whether to use a logarithmic scale.",
+                    "title": "Log",
+                    "type": "boolean"
+                }
+            },
+            "required": [
+                "low",
+                "high"
+            ],
+            "title": "ParamSpaceInt",
+            "type": "object"
+        },
         "RerankScorerInitModel": {
             "properties": {
                 "module_name": {
@@ -543,12 +793,33 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "k": {
-                    "items": {
-                        "type": "integer"
-                    },
-                    "title": "K",
-                    "type": "array"
+                    "anyOf": [
+                        {
+                            "items": {
+                                "type": "integer"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
+                    "title": "K"
                 },
                 "weights": {
                     "items": {
@@ -597,38 +868,52 @@
                     "type": "array"
                 },
                 "m": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "anyOf": [
+                                    {
+                                        "type": "integer"
+                                    },
+                                    {
+                                        "type": "null"
+                                    }
+                                ]
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
                     "default": [
                         null
                     ],
-                    "items": {
-                        "anyOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
-                    },
-                    "title": "M",
-                    "type": "array"
+                    "title": "M"
                 },
                 "rank_threshold_cutoff": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "anyOf": [
+                                    {
+                                        "type": "integer"
+                                    },
+                                    {
+                                        "type": "null"
+                                    }
+                                ]
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
                     "default": [
                         null
                     ],
-                    "items": {
-                        "anyOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
-                    },
-                    "title": "Rank Threshold Cutoff",
-                    "type": "array"
+                    "title": "Rank Threshold Cutoff"
                 }
             },
             "required": [
@@ -647,12 +932,34 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "k": {
-                    "items": {
-                        "type": "integer"
-                    },
-                    "title": "K",
-                    "type": "array"
+                    "anyOf": [
+                        {
+                            "items": {
+                                "exclusiveMinimum": 0,
+                                "type": "integer"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
+                    "title": "K"
                 },
                 "embedder_name": {
                     "items": {
@@ -764,6 +1071,20 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "clf_name": {
                     "default": [
                         "LogisticRegression"
@@ -822,25 +1143,50 @@
                     "title": "Module Name",
                     "type": "string"
                 },
+                "n_trials": {
+                    "anyOf": [
+                        {
+                            "exclusiveMinimum": 0,
+                            "type": "integer"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
+                    "description": "Number of trials",
+                    "title": "N Trials"
+                },
                 "thresh": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "anyOf": [
+                                    {
+                                        "maximum": 1.0,
+                                        "minimum": 0.0,
+                                        "type": "number"
+                                    },
+                                    {
+                                        "items": {
+                                            "maximum": 1.0,
+                                            "minimum": 0.0,
+                                            "type": "number"
+                                        },
+                                        "type": "array"
+                                    }
+                                ]
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceFloat"
+                        }
+                    ],
                     "default": [
                         0.5
                     ],
-                    "items": {
-                        "anyOf": [
-                            {
-                                "type": "number"
-                            },
-                            {
-                                "items": {
-                                    "type": "number"
-                                },
-                                "type": "array"
-                            }
-                        ]
-                    },
-                    "title": "Thresh",
-                    "type": "array"
+                    "title": "Thresh"
                 }
             },
             "required": [
@@ -857,14 +1203,22 @@
                     "type": "string"
                 },
                 "n_trials": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "exclusiveMinimum": 0,
+                                "type": "integer"
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
                     "default": [
                         320
                     ],
-                    "items": {
-                        "type": "integer"
-                    },
-                    "title": "N Trials",
-                    "type": "array"
+                    "title": "N Trials"
                 }
             },
             "required": [

From 1459b6077f55ea46d60f74860b87d6c4936b3224 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 14 Feb 2025 10:59:16 +0300
Subject: [PATCH 66/74] return CI config back to normal

---
 .github/workflows/test-inference.yaml    | 2 ++
 .github/workflows/test-nodes.yaml        | 2 ++
 .github/workflows/test-optimization.yaml | 2 ++
 .github/workflows/unit-tests.yaml        | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/.github/workflows/test-inference.yaml b/.github/workflows/test-inference.yaml
index a68ef07e8..ca4c45ff0 100644
--- a/.github/workflows/test-inference.yaml
+++ b/.github/workflows/test-inference.yaml
@@ -5,6 +5,8 @@ on:
     branches:
       - dev
   pull_request:
+    branches:
+      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/test-nodes.yaml b/.github/workflows/test-nodes.yaml
index b10161724..99507571b 100644
--- a/.github/workflows/test-nodes.yaml
+++ b/.github/workflows/test-nodes.yaml
@@ -5,6 +5,8 @@ on:
     branches:
       - dev
   pull_request:
+    branches:
+      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/test-optimization.yaml b/.github/workflows/test-optimization.yaml
index 4625f39d7..ea1cf861e 100644
--- a/.github/workflows/test-optimization.yaml
+++ b/.github/workflows/test-optimization.yaml
@@ -5,6 +5,8 @@ on:
     branches:
       - dev
   pull_request:
+    branches:
+      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
index 5883080eb..3612d561f 100644
--- a/.github/workflows/unit-tests.yaml
+++ b/.github/workflows/unit-tests.yaml
@@ -5,6 +5,8 @@ on:
     branches:
       - dev
   pull_request:
+    branches:
+      - dev
 
 jobs:
   test:

From f7798a1486107add1af0f8ca6c88f9361f4fcac8 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 14 Feb 2025 11:00:40 +0300
Subject: [PATCH 67/74] fix default value for step in `ParamSpaceFloat`

---
 autointent/nodes/_optimization/_node_optimizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py
index bca970ebd..8ff1255c8 100644
--- a/autointent/nodes/_optimization/_node_optimizer.py
+++ b/autointent/nodes/_optimization/_node_optimizer.py
@@ -28,7 +28,7 @@ class ParamSpaceInt(BaseModel):
 class ParamSpaceFloat(BaseModel):
     low: float = Field(..., description="Low boundary of the search space.")
     high: float = Field(..., description="High boundary of the search space.")
-    step: float = Field(0.1, description="Step of the search space.")
+    step: float | None = Field(None, description="Step of the search space.")
     log: bool = Field(False, description="Whether to use a logarithmic scale.")
 
 

From 2adb593bcb737d7c695ec8ac1e721bc3ccd3e5fe Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 14 Feb 2025 11:18:45 +0300
Subject: [PATCH 68/74] update schema

---
 docs/optimizer_config.schema.json | 75 ++++++++++++++-----------------
 1 file changed, 34 insertions(+), 41 deletions(-)

diff --git a/docs/optimizer_config.schema.json b/docs/optimizer_config.schema.json
index 2d2213e7e..a0015d750 100644
--- a/docs/optimizer_config.schema.json
+++ b/docs/optimizer_config.schema.json
@@ -940,10 +940,17 @@
                     "type": "number"
                 },
                 "step": {
-                    "default": 0.1,
+                    "anyOf": [
+                        {
+                            "type": "number"
+                        },
+                        {
+                            "type": "null"
+                        }
+                    ],
+                    "default": null,
                     "description": "Step of the search space.",
-                    "title": "Step",
-                    "type": "number"
+                    "title": "Step"
                 },
                 "log": {
                     "default": false,
@@ -1073,52 +1080,38 @@
                     "type": "array"
                 },
                 "m": {
-                    "anyOf": [
-                        {
-                            "items": {
-                                "anyOf": [
-                                    {
-                                        "type": "integer"
-                                    },
-                                    {
-                                        "type": "null"
-                                    }
-                                ]
-                            },
-                            "type": "array"
-                        },
-                        {
-                            "$ref": "#/$defs/ParamSpaceInt"
-                        }
-                    ],
                     "default": [
                         null
                     ],
-                    "title": "M"
+                    "items": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "title": "M",
+                    "type": "array"
                 },
                 "rank_threshold_cutoff": {
-                    "anyOf": [
-                        {
-                            "items": {
-                                "anyOf": [
-                                    {
-                                        "type": "integer"
-                                    },
-                                    {
-                                        "type": "null"
-                                    }
-                                ]
-                            },
-                            "type": "array"
-                        },
-                        {
-                            "$ref": "#/$defs/ParamSpaceInt"
-                        }
-                    ],
                     "default": [
                         null
                     ],
-                    "title": "Rank Threshold Cutoff"
+                    "items": {
+                        "anyOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "null"
+                            }
+                        ]
+                    },
+                    "title": "Rank Threshold Cutoff",
+                    "type": "array"
                 }
             },
             "required": [

From c32c356146119d24d80fd13640d9798ca7d99976 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 14 Feb 2025 11:22:10 +0300
Subject: [PATCH 69/74] update callback test

---
 tests/callback/test_callback.py | 67 +++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 3 deletions(-)

diff --git a/tests/callback/test_callback.py b/tests/callback/test_callback.py
index 53b70491c..c9931a6b7 100644
--- a/tests/callback/test_callback.py
+++ b/tests/callback/test_callback.py
@@ -118,17 +118,78 @@ def test_pipeline_callbacks(dataset):
         ("end_module", {}),
         (
             "start_module",
-            {"module_kwargs": {"embedder_config": None, "k": 1, "weights": "uniform"}, "module_name": "knn", "num": 0},
+            {
+                "module_kwargs": {
+                    "embedder_config": {
+                        "batch_size": 32,
+                        "classifier_prompt": None,
+                        "cluster_prompt": None,
+                        "default_prompt": None,
+                        "device": None,
+                        "max_length": None,
+                        "model_name": "sergeyzh/rubert-tiny-turbo",
+                        "passage_prompt": None,
+                        "query_prompt": None,
+                        "sts_prompt": None,
+                        "use_cache": False,
+                    },
+                    "k": 1,
+                    "weights": "uniform",
+                },
+                "module_name": "knn",
+                "num": 0,
+            },
         ),
         ("log_metric", {"metrics": {"scoring_accuracy": 1.0, "scoring_roc_auc": 1.0}}),
         ("end_module", {}),
         (
             "start_module",
-            {"module_kwargs": {"embedder_config": None, "k": 1, "weights": "distance"}, "module_name": "knn", "num": 1},
+            {
+                "module_kwargs": {
+                    "embedder_config": {
+                        "batch_size": 32,
+                        "classifier_prompt": None,
+                        "cluster_prompt": None,
+                        "default_prompt": None,
+                        "device": None,
+                        "max_length": None,
+                        "model_name": "sergeyzh/rubert-tiny-turbo",
+                        "passage_prompt": None,
+                        "query_prompt": None,
+                        "sts_prompt": None,
+                        "use_cache": False,
+                    },
+                    "k": 1,
+                    "weights": "distance",
+                },
+                "module_name": "knn",
+                "num": 1,
+            },
         ),
         ("log_metric", {"metrics": {"scoring_accuracy": 1.0, "scoring_roc_auc": 1.0}}),
         ("end_module", {}),
-        ("start_module", {"module_kwargs": {"embedder_config": None}, "module_name": "linear", "num": 0}),
+        (
+            "start_module",
+            {
+                "module_kwargs": {
+                    "embedder_config": {
+                        "batch_size": 32,
+                        "classifier_prompt": None,
+                        "cluster_prompt": None,
+                        "default_prompt": None,
+                        "device": None,
+                        "max_length": None,
+                        "model_name": "sergeyzh/rubert-tiny-turbo",
+                        "passage_prompt": None,
+                        "query_prompt": None,
+                        "sts_prompt": None,
+                        "use_cache": False,
+                    },
+                },
+                "module_name": "linear",
+                "num": 0,
+            },
+        ),
         ("log_metric", {"metrics": {"scoring_accuracy": 0.75, "scoring_roc_auc": 1.0}}),
         ("end_module", {}),
         ("start_module", {"module_kwargs": {"thresh": 0.5}, "module_name": "threshold", "num": 0}),

From 70b4079223c9f2d94dccab1b75dedfb4c19ef121 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 14 Feb 2025 11:22:16 +0300
Subject: [PATCH 70/74] change CI config

---
 .github/workflows/test-inference.yaml    | 2 --
 .github/workflows/test-nodes.yaml        | 2 --
 .github/workflows/test-optimization.yaml | 2 --
 .github/workflows/unit-tests.yaml        | 2 --
 4 files changed, 8 deletions(-)

diff --git a/.github/workflows/test-inference.yaml b/.github/workflows/test-inference.yaml
index ca4c45ff0..a68ef07e8 100644
--- a/.github/workflows/test-inference.yaml
+++ b/.github/workflows/test-inference.yaml
@@ -5,8 +5,6 @@ on:
     branches:
       - dev
   pull_request:
-    branches:
-      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/test-nodes.yaml b/.github/workflows/test-nodes.yaml
index 99507571b..b10161724 100644
--- a/.github/workflows/test-nodes.yaml
+++ b/.github/workflows/test-nodes.yaml
@@ -5,8 +5,6 @@ on:
     branches:
       - dev
   pull_request:
-    branches:
-      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/test-optimization.yaml b/.github/workflows/test-optimization.yaml
index ea1cf861e..4625f39d7 100644
--- a/.github/workflows/test-optimization.yaml
+++ b/.github/workflows/test-optimization.yaml
@@ -5,8 +5,6 @@ on:
     branches:
       - dev
   pull_request:
-    branches:
-      - dev
 
 jobs:
   test:
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
index 3612d561f..5883080eb 100644
--- a/.github/workflows/unit-tests.yaml
+++ b/.github/workflows/unit-tests.yaml
@@ -5,8 +5,6 @@ on:
     branches:
       - dev
   pull_request:
-    branches:
-      - dev
 
 jobs:
   test:

From d34a4c02902f458d5ea34dec55e8997c2b218d25 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 14 Feb 2025 11:25:31 +0300
Subject: [PATCH 71/74] update search space configs for testing

---
 tests/assets/configs/multiclass.yaml | 2 +-
 tests/assets/configs/optuna.yaml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/assets/configs/multiclass.yaml b/tests/assets/configs/multiclass.yaml
index 69db79f8b..ea76dda24 100644
--- a/tests/assets/configs/multiclass.yaml
+++ b/tests/assets/configs/multiclass.yaml
@@ -20,7 +20,7 @@
         - avsolatorio/GIST-small-Embedding-v0
       k: [1, 3]
     - module_name: sklearn
-      embedder_name:
+      embedder_config:
         - sergeyzh/rubert-tiny-turbo
       clf_name:
         - LogisticRegression
diff --git a/tests/assets/configs/optuna.yaml b/tests/assets/configs/optuna.yaml
index 636e217fc..b91b463ee 100644
--- a/tests/assets/configs/optuna.yaml
+++ b/tests/assets/configs/optuna.yaml
@@ -3,7 +3,7 @@
   search_space:
     - module_name: retrieval
       k: [10]
-      embedder_name:
+      embedder_config:
         - sentence-transformers/all-MiniLM-L6-v2
         - avsolatorio/GIST-small-Embedding-v0
 - node_type: scoring

From d8d7852362f848103218fd80864bf2803fce18fd Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 14 Feb 2025 11:28:45 +0300
Subject: [PATCH 72/74] enable validation back

---
 autointent/_pipeline/_pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py
index 2b4e463fc..3085b90ba 100644
--- a/autointent/_pipeline/_pipeline.py
+++ b/autointent/_pipeline/_pipeline.py
@@ -13,6 +13,7 @@
 from autointent.custom_types import ListOfGenericLabels, NodeType, SamplerType, ValidationScheme
 from autointent.metrics import PREDICTION_METRICS_MULTILABEL
 from autointent.nodes import InferenceNode, NodeOptimizer
+from autointent.nodes.schemes import OptimizationConfig
 from autointent.utils import load_default_search_space, load_search_space
 
 from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput
@@ -70,7 +71,7 @@ def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str, seed
         """
         if isinstance(search_space, Path | str):
             search_space = load_search_space(search_space)
-        validated_search_space = search_space  # OptimizationConfig(search_space).model_dump()  # type: ignore[arg-type]
+        validated_search_space = OptimizationConfig(search_space).model_dump()  # type: ignore[arg-type]
         nodes = [NodeOptimizer(**node) for node in validated_search_space]
         return cls(nodes=nodes, seed=seed)
 

From f727bf4af517ad55f8aa78e7051ccf26febc9317 Mon Sep 17 00:00:00 2001
From: voorhs <ilya_alekseev_2016@list.ru>
Date: Fri, 14 Feb 2025 13:44:55 +0300
Subject: [PATCH 73/74] remove TunableDecision from search spaces

---
 tests/assets/configs/multiclass.yaml | 1 -
 tests/assets/configs/multilabel.yaml | 1 -
 tests/assets/configs/optuna.yaml     | 1 -
 3 files changed, 3 deletions(-)

diff --git a/tests/assets/configs/multiclass.yaml b/tests/assets/configs/multiclass.yaml
index ea76dda24..3fbf8948c 100644
--- a/tests/assets/configs/multiclass.yaml
+++ b/tests/assets/configs/multiclass.yaml
@@ -36,6 +36,5 @@
   search_space:
     - module_name: threshold
       thresh: [0.5, [0.5, 0.5, 0.5, 0.5]]
-    - module_name: tunable
     - module_name: argmax
     - module_name: jinoos
diff --git a/tests/assets/configs/multilabel.yaml b/tests/assets/configs/multilabel.yaml
index 879c31c6d..91742358a 100644
--- a/tests/assets/configs/multilabel.yaml
+++ b/tests/assets/configs/multilabel.yaml
@@ -32,5 +32,4 @@
   search_space:
     - module_name: threshold
       thresh: [0.5, [0.5, 0.5, 0.5, 0.5]]
-    - module_name: tunable
     - module_name: adaptive
diff --git a/tests/assets/configs/optuna.yaml b/tests/assets/configs/optuna.yaml
index b91b463ee..b775ab3f6 100644
--- a/tests/assets/configs/optuna.yaml
+++ b/tests/assets/configs/optuna.yaml
@@ -24,6 +24,5 @@
       thresh:
         low: 0.1
         high: 0.9
-    - module_name: tunable
     - module_name: argmax
     - module_name: jinoos

From fa006f8ae47d2c5b90bcf42d5532922129833fd9 Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Fri, 14 Feb 2025 14:41:28 +0300
Subject: [PATCH 74/74] upd schema

---
 docs/optimizer_config.schema.json | 62 +++++++++++++++++++------------
 1 file changed, 38 insertions(+), 24 deletions(-)

diff --git a/docs/optimizer_config.schema.json b/docs/optimizer_config.schema.json
index a0015d750..f51fa93a3 100644
--- a/docs/optimizer_config.schema.json
+++ b/docs/optimizer_config.schema.json
@@ -1080,38 +1080,52 @@
                     "type": "array"
                 },
                 "m": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "anyOf": [
+                                    {
+                                        "type": "integer"
+                                    },
+                                    {
+                                        "type": "null"
+                                    }
+                                ]
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
                     "default": [
                         null
                     ],
-                    "items": {
-                        "anyOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
-                    },
-                    "title": "M",
-                    "type": "array"
+                    "title": "M"
                 },
                 "rank_threshold_cutoff": {
+                    "anyOf": [
+                        {
+                            "items": {
+                                "anyOf": [
+                                    {
+                                        "type": "integer"
+                                    },
+                                    {
+                                        "type": "null"
+                                    }
+                                ]
+                            },
+                            "type": "array"
+                        },
+                        {
+                            "$ref": "#/$defs/ParamSpaceInt"
+                        }
+                    ],
                     "default": [
                         null
                     ],
-                    "items": {
-                        "anyOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "null"
-                            }
-                        ]
-                    },
-                    "title": "Rank Threshold Cutoff",
-                    "type": "array"
+                    "title": "Rank Threshold Cutoff"
                 }
             },
             "required": [