From 8019153e53e9696a1d67d77065a3201f742f9246 Mon Sep 17 00:00:00 2001 From: voorhs Date: Sat, 1 Feb 2025 12:03:27 +0300 Subject: [PATCH 01/74] define interface --- autointent/modules/abc/_scoring.py | 33 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index f275de9d0..7ac8045ff 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -1,8 +1,9 @@ """Base class for scoring modules.""" from abc import ABC, abstractmethod -from typing import Any, Literal +from typing import Any +import numpy as np import numpy.typing as npt from autointent import Context @@ -21,7 +22,7 @@ class ScoringModule(Module, ABC): supports_oos = False - def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]: + def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, float]: """ Evaluate the scorer on a test set and compute the specified metric. @@ -29,25 +30,23 @@ def score(self, context: Context, split: Literal["validation", "test"], metrics: :param split: Target split :return: Computed metrics value for the test set or error code of metrics """ - if split == "validation": - utterances = context.data_handler.validation_utterances(0) - labels = context.data_handler.validation_labels(0) - elif split == "test": + metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS + chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} + + if test: utterances = context.data_handler.test_utterances() labels = context.data_handler.test_labels() - else: - message = f"Invalid split '{split}' provided. Expected one of 'validation', or 'test'." - raise ValueError(message) + scores = self.predict(utterances) + return self.score_metrics((labels, scores), chosen_metrics) - scores = self.predict(utterances) + metrics_values = {name: [] for name in chosen_metrics} + for train_utterances, train_labels, val_utterances, val_labels in context.validation_iterator(0): + self.fit(train_utterances, train_labels) + val_scores = self.predict(val_utterances) + for name, fn in chosen_metrics.items(): + metrics_values[name].append(fn(val_labels, val_scores)) - self._train_scores = self.predict(context.data_handler.train_utterances(1)) - self._validation_scores = self.predict(context.data_handler.validation_utterances(1)) - self._test_scores = self.predict(context.data_handler.test_utterances()) - - metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS - chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} - return self.score_metrics((labels, scores), chosen_metrics) + return {name: np.mean(values_list) for name, values_list in metrics_values.items()} def get_assets(self) -> ScorerArtifact: """ From 90af5ce9b2426681fdea5076a76504c7858b07dc Mon Sep 17 00:00:00 2001 From: voorhs Date: Sat, 1 Feb 2025 12:39:49 +0300 Subject: [PATCH 02/74] basic ho iterator --- .../context/data_handler/_data_handler.py | 33 ++++++++++++++++--- autointent/modules/abc/_scoring.py | 2 +- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index 06387b1f1..b0772e4d0 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -2,7 +2,7 @@ import logging from pathlib import Path -from typing import TypedDict, cast +from typing import Literal, TypedDict, cast from datasets import concatenate_datasets from transformers import set_seed @@ -26,10 +26,12 @@ class RegexPatterns(TypedDict): """Partial match regex patterns.""" -class DataHandler: +class DataHandler: # TODO rename to Validator """Data handler class.""" - def __init__(self, dataset: Dataset, random_seed: int = 0, split_train: bool = True) -> None: + def __init__( + self, dataset: Dataset, scheme: Literal["cv", "ho"], split_train: bool = True, random_seed: int = 0 + ) -> None: """ Initialize the data handler. @@ -43,8 +45,12 @@ def __init__(self, dataset: Dataset, random_seed: int = 0, split_train: bool = T self.dataset = dataset self.n_classes = self.dataset.n_classes + self.scheme = scheme - self._split(random_seed, split_train) + if scheme == "ho": + self._split_ho(random_seed, split_train) + elif scheme == "cv": + self._split_cv(random_seed) self.regexp_patterns = [ RegexPatterns( @@ -153,6 +159,23 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels: split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature]) + def validation_iterator(self, idx: int | None = None) -> list[tuple[list, list, list, list]]: + if self.scheme == "ho": + return [ + ( + self.train_utterances(idx), + self.train_labels(idx), + self.validation_utterances(idx), + self.validation_labels(idx), + ) + ] + + if self.scheme == "cv": + raise NotImplementedError + + msg = "something's wrong" + raise RuntimeError(msg) + def dump(self, filepath: str | Path) -> None: """ Save the dataset splits and intents to a JSON file. @@ -161,7 +184,7 @@ def dump(self, filepath: str | Path) -> None: """ self.dataset.to_json(filepath) - def _split(self, random_seed: int, split_train: bool) -> None: + def _split_ho(self, random_seed: int, split_train: bool) -> None: has_validation_split = any(split.startswith(Split.VALIDATION) for split in self.dataset) if split_train and Split.TRAIN in self.dataset: diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index 7ac8045ff..bac3f8f0e 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -40,7 +40,7 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f return self.score_metrics((labels, scores), chosen_metrics) metrics_values = {name: [] for name in chosen_metrics} - for train_utterances, train_labels, val_utterances, val_labels in context.validation_iterator(0): + for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(0): self.fit(train_utterances, train_labels) val_scores = self.predict(val_utterances) for name, fn in chosen_metrics.items(): From 86016782475a9a5ba18bbaa9eae5e977bfe64408 Mon Sep 17 00:00:00 2001 From: voorhs Date: Sat, 1 Feb 2025 13:00:10 +0300 Subject: [PATCH 03/74] move obtaining data for train from node optimizer to modules themselves --- autointent/modules/abc/_base.py | 4 +++ autointent/modules/abc/_decision.py | 4 +++ autointent/modules/abc/_embedding.py | 5 ++++ autointent/modules/abc/_scoring.py | 4 +++ autointent/modules/regexp/_regexp.py | 3 ++ .../scoring/_description/description.py | 7 +++++ .../nodes/_optimization/_node_optimizer.py | 29 ------------------- 7 files changed, 27 insertions(+), 29 deletions(-) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index 287b5126d..567405254 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -166,3 +166,7 @@ def _get_task_specs(labels: ListOfGenericLabels) -> tuple[int, bool, bool]: multilabel = isinstance(in_domain_label, list) n_classes = len(labels[0]) if multilabel else len(set(labels).difference([None])) # type: ignore[arg-type] return n_classes, multilabel, contains_oos_samples + + @abstractmethod + def get_train_data(self, context: Context) -> Any: # noqa: ANN401 + ... diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py index 750ee05f7..367a2a435 100644 --- a/autointent/modules/abc/_decision.py +++ b/autointent/modules/abc/_decision.py @@ -72,6 +72,10 @@ def _validate_task(self, scores: npt.NDArray[Any], labels: ListOfGenericLabels) ) raise ValueError(msg) + def get_train_data(self, context: Context) -> tuple[npt.NDArray[Any], ListOfGenericLabels, list[Tag]]: + labels, scores = get_decision_evaluation_data(context, "train") + return (scores, labels, context.data_handler.tags) + def get_decision_evaluation_data( context: Context, diff --git a/autointent/modules/abc/_embedding.py b/autointent/modules/abc/_embedding.py index cb3a2f412..708a749eb 100644 --- a/autointent/modules/abc/_embedding.py +++ b/autointent/modules/abc/_embedding.py @@ -2,8 +2,13 @@ from abc import ABC +from autointent import Context +from autointent.custom_types import ListOfLabels from autointent.modules.abc import Module class EmbeddingModule(Module, ABC): """Base class for embedding modules.""" + + def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]: + return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index bac3f8f0e..97004a63d 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -8,6 +8,7 @@ from autointent import Context from autointent.context.optimization_info import ScorerArtifact +from autointent.custom_types import ListOfLabels from autointent.metrics import SCORING_METRICS_MULTICLASS, SCORING_METRICS_MULTILABEL from autointent.modules.abc import Module @@ -60,6 +61,9 @@ def get_assets(self) -> ScorerArtifact: test_scores=self._test_scores, ) + def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]: + return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) + @abstractmethod def predict(self, utterances: list[str]) -> npt.NDArray[Any]: """ diff --git a/autointent/modules/regexp/_regexp.py b/autointent/modules/regexp/_regexp.py index 05d0278eb..f486b682b 100644 --- a/autointent/modules/regexp/_regexp.py +++ b/autointent/modules/regexp/_regexp.py @@ -151,3 +151,6 @@ def _compile_regex_patterns(self) -> None: ) for regexp_patterns in self.regexp_patterns ] + + def get_train_data(self, context: Context) -> tuple: + return () diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py index aa6994c8c..071837cf2 100644 --- a/autointent/modules/scoring/_description/description.py +++ b/autointent/modules/scoring/_description/description.py @@ -146,3 +146,10 @@ def predict(self, utterances: list[str]) -> NDArray[np.float64]: def clear_cache(self) -> None: """Clear cached data in memory used by the embedder.""" self._embedder.clear_ram() + + def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels, list[str]]: + return ( + context.data_handler.train_utterances(0), + context.data_handler.train_labels(0), + context.data_handler.intent_descriptions, + ) diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 868403f9b..f8f29b763 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -11,8 +11,6 @@ from autointent.context import Context from autointent.custom_types import NodeType -from autointent.modules.abc import Module -from autointent.modules.abc._decision import get_decision_evaluation_data from autointent.nodes._nodes_info import NODES_INFO @@ -116,30 +114,3 @@ def get_module_dump_dir(self, dump_dir: Path, module_name: str, j_combination: i dump_dir_ = dump_dir / self.node_info.node_type / module_name / f"comb_{j_combination}" dump_dir_.mkdir(parents=True, exist_ok=True) return str(dump_dir_) - - def module_fit(self, module: Module, context: Context) -> None: - """ - Fit the module. - - :param module: Module to fit - :param context: Context to use - """ - if self.node_info.node_type in ["embedding", "scoring"]: - if module.__class__.__name__ == "DescriptionScorer": - args = ( - context.data_handler.train_utterances(0), - context.data_handler.train_labels(0), - context.data_handler.intent_descriptions, - ) - else: - args = (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) # type: ignore[assignment] - elif self.node_info.node_type == "decision": - labels, scores = get_decision_evaluation_data(context, "train") - args = (scores, labels, context.data_handler.tags) # type: ignore[assignment] - elif self.node_info.node_type == "regexp": - args = () # type: ignore[assignment] - else: - msg = "something's wrong" - self._logger.error(msg) - raise ValueError(msg) - module.fit(*args) # type: ignore[arg-type] From ccf6e415b6e00d3430457eb6b175eed6160126f7 Mon Sep 17 00:00:00 2001 From: voorhs Date: Sun, 2 Feb 2025 08:01:20 +0300 Subject: [PATCH 04/74] stage progress --- autointent/modules/abc/_base.py | 4 ++-- autointent/modules/abc/_scoring.py | 4 ++++ autointent/nodes/_optimization/_node_optimizer.py | 5 +---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index 567405254..61de46241 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -3,7 +3,7 @@ import logging from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Literal +from typing import Any import numpy.typing as npt @@ -34,7 +34,7 @@ def fit(self, *args: tuple[Any], **kwargs: dict[str, Any]) -> None: """ @abstractmethod - def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]: + def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, float]: """ Calculate metric on test set and return metric value. diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index 97004a63d..f93c55c2b 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -41,11 +41,15 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f return self.score_metrics((labels, scores), chosen_metrics) metrics_values = {name: [] for name in chosen_metrics} + all_val_scores = [] for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(0): self.fit(train_utterances, train_labels) val_scores = self.predict(val_utterances) for name, fn in chosen_metrics.items(): metrics_values[name].append(fn(val_labels, val_scores)) + all_val_scores.append(val_scores) + + self._validation_scores = np.concat(all_val_scores, axis=0) return {name: np.mean(values_list) for name, values_list in metrics_values.items()} diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index f8f29b763..2dd481bfa 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -66,11 +66,8 @@ def fit(self, context: Context) -> None: if embedder_name is not None: module_kwargs["embedder_name"] = embedder_name - self._logger.debug("optimizing %s module...", module_name) - self.module_fit(module, context) - self._logger.debug("scoring %s module...", module_name) - metrics_score = module.score(context, "validation", self.metrics) + metrics_score = module.score(context, test=False, metrics=self.metrics) metric_value = metrics_score[self.target_metric] context.callback_handler.log_metrics(metrics_score) From a307fee35f5060445020414889e7cbf39f20907b Mon Sep 17 00:00:00 2001 From: voorhs Date: Sun, 2 Feb 2025 09:59:13 +0300 Subject: [PATCH 05/74] implement cv iterator --- .../context/data_handler/_data_handler.py | 32 +++++++++++-------- autointent/modules/abc/_decision.py | 19 +++++++++-- autointent/modules/abc/_scoring.py | 32 +++++++++++-------- 3 files changed, 54 insertions(+), 29 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index b0772e4d0..7fa2a4687 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -1,6 +1,7 @@ """Data Handler file.""" import logging +from collections.abc import Generator from pathlib import Path from typing import Literal, TypedDict, cast @@ -30,7 +31,12 @@ class DataHandler: # TODO rename to Validator """Data handler class.""" def __init__( - self, dataset: Dataset, scheme: Literal["cv", "ho"], split_train: bool = True, random_seed: int = 0 + self, + dataset: Dataset, + scheme: Literal["cv", "ho"], + split_train: bool = True, + random_seed: int = 0, + n_folds: int = 3, ) -> None: """ Initialize the data handler. @@ -46,6 +52,7 @@ def __init__( self.n_classes = self.dataset.n_classes self.scheme = scheme + self.n_folds = n_folds if scheme == "ho": self._split_ho(random_seed, split_train) @@ -159,19 +166,18 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels: split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature]) - def validation_iterator(self, idx: int | None = None) -> list[tuple[list, list, list, list]]: + def validation_iterator(self) -> Generator[tuple[list, list, list, list]]: if self.scheme == "ho": - return [ - ( - self.train_utterances(idx), - self.train_labels(idx), - self.validation_utterances(idx), - self.validation_labels(idx), - ) - ] - - if self.scheme == "cv": - raise NotImplementedError + msg = "Cannot call cross-validation on hold-out DataHandler" + raise RuntimeError(msg) + + for j in range(self.n_folds): + val_utterances = self.train_utterances(j) + val_labels = self.train_labels(j) + train_folds = [i for i in range(self.n_folds) if i != j] + train_utterances = [ut for i_fold in train_folds for ut in self.train_utterances(i_fold)] + train_labels = [ut for i_fold in train_folds for ut in self.train_labels(i_fold)] + yield train_utterances, train_labels, val_utterances, val_labels msg = "something's wrong" raise RuntimeError(msg) diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py index 367a2a435..f7501f4eb 100644 --- a/autointent/modules/abc/_decision.py +++ b/autointent/modules/abc/_decision.py @@ -40,7 +40,7 @@ def predict(self, scores: npt.NDArray[Any]) -> ListOfGenericLabels: :param scores: Scores to predict """ - def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]: + def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: """ Calculate metric on test set and return metric value. @@ -48,7 +48,20 @@ def score(self, context: Context, split: Literal["validation", "test"], metrics: :param split: Target split :return: Computed metrics value for the test set or error code of metrics """ - labels, scores = get_decision_evaluation_data(context, split) + labels, scores = get_decision_evaluation_data(context, "validation") + self._decisions = self.predict(scores) + chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics} + return self.score_metrics((labels, self._decisions), chosen_metrics) + + def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: + """ + Calculate metric on test set and return metric value. + + :param context: Context to score + :param split: Target split + :return: Computed metrics value for the test set or error code of metrics + """ + labels, scores = get_decision_evaluation_data(context, "validation") self._decisions = self.predict(scores) chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics} return self.score_metrics((labels, self._decisions), chosen_metrics) @@ -79,7 +92,7 @@ def get_train_data(self, context: Context) -> tuple[npt.NDArray[Any], ListOfGene def get_decision_evaluation_data( context: Context, - split: Literal["train", "validation", "test"], + split: Literal["train", "validation", "test"], # TODO add index to handle both ho and cv ) -> tuple[ListOfGenericLabels, npt.NDArray[np.float64]]: """ Get decision evaluation data. diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index f93c55c2b..945353868 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -23,7 +23,22 @@ class ScoringModule(Module, ABC): supports_oos = False - def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, float]: + def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: + utterances = context.data_handler.validation_utterances(0) + labels = context.data_handler.validation_labels(0) + + scores = self.predict(utterances) + + self._artifact = ScorerArtifact( + train_scores=self.predict(context.data_handler.train_utterances(1)), + validation_scores=self.predict(context.data_handler.validation_utterances(1)), + ) + + metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS + chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} + return self.score_metrics((labels, scores), chosen_metrics) + + def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: """ Evaluate the scorer on a test set and compute the specified metric. @@ -34,12 +49,6 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} - if test: - utterances = context.data_handler.test_utterances() - labels = context.data_handler.test_labels() - scores = self.predict(utterances) - return self.score_metrics((labels, scores), chosen_metrics) - metrics_values = {name: [] for name in chosen_metrics} all_val_scores = [] for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(0): @@ -49,7 +58,8 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f metrics_values[name].append(fn(val_labels, val_scores)) all_val_scores.append(val_scores) - self._validation_scores = np.concat(all_val_scores, axis=0) + # save all predictions unbinded to preserve folding + self._artifact = ScorerArtifact(validation_scores=all_val_scores) return {name: np.mean(values_list) for name, values_list in metrics_values.items()} @@ -59,11 +69,7 @@ def get_assets(self) -> ScorerArtifact: :return: ScorerArtifact containing test, validation and test scores. """ - return ScorerArtifact( - train_scores=self._train_scores, - validation_scores=self._validation_scores, - test_scores=self._test_scores, - ) + return self._artifact def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]: return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) From 8649e14cc21436c5a112eba3e40a02894b44cc13 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 06:10:48 +0300 Subject: [PATCH 06/74] minor bug fix --- autointent/context/optimization_info/_data_models.py | 3 +++ autointent/modules/abc/_scoring.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/autointent/context/optimization_info/_data_models.py b/autointent/context/optimization_info/_data_models.py index 49ca6bf83..55ce0937c 100644 --- a/autointent/context/optimization_info/_data_models.py +++ b/autointent/context/optimization_info/_data_models.py @@ -42,6 +42,9 @@ class ScorerArtifact(Artifact): train_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for train utterances") validation_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for validation utterances") test_scores: NDArray[np.float64] | None = Field(None, description="Scorer outputs for test utterances") + folded_scores: list[NDArray[np.float64]] | None = Field( + None, description="Scores for each fold from cross-validation" + ) class DecisionArtifact(Artifact): diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index 945353868..e8916d816 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -51,7 +51,7 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: metrics_values = {name: [] for name in chosen_metrics} all_val_scores = [] - for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(0): + for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(): self.fit(train_utterances, train_labels) val_scores = self.predict(val_utterances) for name, fn in chosen_metrics.items(): @@ -59,7 +59,7 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: all_val_scores.append(val_scores) # save all predictions unbinded to preserve folding - self._artifact = ScorerArtifact(validation_scores=all_val_scores) + self._artifact = ScorerArtifact(folded_scores=all_val_scores) return {name: np.mean(values_list) for name, values_list in metrics_values.items()} From a300f19cd354c030251a65286460b8cc82ba9618 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 06:11:43 +0300 Subject: [PATCH 07/74] implement cv iterator for decision node --- .../context/data_handler/_data_handler.py | 3 ++ .../optimization_info/_optimization_info.py | 9 ++++ autointent/modules/abc/_decision.py | 42 +++++++++++++------ 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index 7fa2a4687..f7ef69483 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -110,6 +110,9 @@ def train_labels(self, idx: int | None = None) -> ListOfGenericLabels: split = f"{Split.TRAIN}_{idx}" if idx is not None else Split.TRAIN return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature]) + def train_labels_folded(self) -> list[ListOfGenericLabels]: + return [self.train_labels(j) for j in range(self.n_folds)] + def validation_utterances(self, idx: int | None = None) -> list[str]: """ Retrieve validation utterances from the dataset. diff --git a/autointent/context/optimization_info/_optimization_info.py b/autointent/context/optimization_info/_optimization_info.py index 7ec1e5a01..f426d17d4 100644 --- a/autointent/context/optimization_info/_optimization_info.py +++ b/autointent/context/optimization_info/_optimization_info.py @@ -166,6 +166,15 @@ def get_best_validation_scores(self) -> NDArray[np.float64] | None: best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring) # type: ignore[assignment] return best_scorer_artifact.validation_scores + def get_best_folded_scores(self) -> list[NDArray[np.float64]] | None: + """ + Retrieve the validation scores from the best scorer node. + + :return: Validation scores as a numpy array. + """ + best_scorer_artifact: ScorerArtifact = self._get_best_artifact(node_type=NodeType.scoring) # type: ignore[assignment] + return best_scorer_artifact.folded_scores + def get_best_test_scores(self) -> NDArray[np.float64] | None: """ Retrieve the test scores from the best scorer node. diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py index f7501f4eb..b0603ddb8 100644 --- a/autointent/modules/abc/_decision.py +++ b/autointent/modules/abc/_decision.py @@ -49,9 +49,10 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: :return: Computed metrics value for the test set or error code of metrics """ labels, scores = get_decision_evaluation_data(context, "validation") - self._decisions = self.predict(scores) + decisions = self.predict(scores) chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics} - return self.score_metrics((labels, self._decisions), chosen_metrics) + self._artifact = DecisionArtifact(labels=decisions) + return self.score_metrics((labels, decisions), chosen_metrics) def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: """ @@ -61,14 +62,34 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: :param split: Target split :return: Computed metrics value for the test set or error code of metrics """ - labels, scores = get_decision_evaluation_data(context, "validation") - self._decisions = self.predict(scores) + labels = context.data_handler.train_labels_folded() + scores = context.optimization_info.get_best_folded_scores() + + if scores is None: + msg = "No folded scores are found." + raise RuntimeError(msg) + chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics} - return self.score_metrics((labels, self._decisions), chosen_metrics) + metrics_values = {name: [] for name in chosen_metrics} + all_val_decisions = [] + for j in range(context.data_handler.n_folds): + val_labels = labels[j] + val_scores = scores[j] + train_folds = [i for i in range(context.data_handler.n_folds) if i != j] + train_labels = [ut for i_fold in train_folds for ut in labels[i_fold]] + train_scores = [ut for i_fold in train_folds for ut in scores[i_fold]] + self.fit(train_scores, train_labels, context.data_handler.tags) + val_decisions = self.predict(val_scores) + for name, fn in chosen_metrics.items(): + metrics_values[name].append(fn(val_labels, val_decisions)) + all_val_decisions.append(val_decisions) + + self._artifact = DecisionArtifact(labels=[pred for pred_list in all_val_decisions for pred in pred_list]) + return {name: np.mean(values_list) for name, values_list in metrics_values.items()} def get_assets(self) -> DecisionArtifact: """Return useful assets that represent intermediate data into context.""" - return DecisionArtifact(labels=self._decisions) + return self._artifact def clear_cache(self) -> None: """Clear cache.""" @@ -80,7 +101,7 @@ def _validate_task(self, scores: npt.NDArray[Any], labels: ListOfGenericLabels) if self._n_classes != scores.shape[1]: msg = ( "There is a mismatch between provided labels and scores. " - f"Labels contains {self._n_classes} classes, but scores contain " + f"Labels contain {self._n_classes} classes, but scores contain " f"probabilities for {scores.shape[1]} classes." ) raise ValueError(msg) @@ -92,7 +113,7 @@ def get_train_data(self, context: Context) -> tuple[npt.NDArray[Any], ListOfGene def get_decision_evaluation_data( context: Context, - split: Literal["train", "validation", "test"], # TODO add index to handle both ho and cv + split: Literal["train", "validation"], ) -> tuple[ListOfGenericLabels, npt.NDArray[np.float64]]: """ Get decision evaluation data. @@ -107,11 +128,8 @@ def get_decision_evaluation_data( elif split == "validation": labels = context.data_handler.validation_labels(1) scores = context.optimization_info.get_best_validation_scores() - elif split == "test": - labels = context.data_handler.test_labels() - scores = context.optimization_info.get_best_test_scores() else: - message = f"Invalid split '{split}' provided. Expected one of 'train', 'validation', or 'test'." + message = f"Invalid split '{split}' provided. Expected one of 'train', 'validation'." raise ValueError(message) if scores is None: From 815561f1b1e31d832a6228f0971ba67b7e0515df Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 06:42:35 +0300 Subject: [PATCH 08/74] move cv iteration to base module definition --- autointent/modules/abc/_base.py | 20 +++++++++++++++++++- autointent/modules/abc/_scoring.py | 15 ++++----------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index 61de46241..4f6635ba6 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -2,9 +2,11 @@ import logging from abc import ABC, abstractmethod +from collections.abc import Iterable from pathlib import Path from typing import Any +import numpy as np import numpy.typing as npt from autointent._dump_tools import Dumper @@ -106,7 +108,7 @@ def get_embedder_name(self) -> str | None: return None @staticmethod - def score_metrics(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> dict[str, float]: + def score_metrics_ho(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> dict[str, float]: """ Score metrics on the test set. @@ -119,6 +121,22 @@ def score_metrics(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> dict metrics[metric_name] = metric_fn(*params) return metrics + def score_metrics_cv( + self, metrics_dict: dict[str, Any], cv_iterator: Iterable[tuple[list, list, list, list]] + ) -> tuple[dict[str, float], list[ListOfGenericLabels] | list[npt.NDArray[Any]]]: + metrics_values = {name: [] for name in metrics_dict} + all_val_preds = [] + + for train_utterances, train_labels, val_utterances, val_labels in cv_iterator: + self.fit(train_utterances, train_labels) + val_preds = self.predict(val_utterances) + for name, fn in metrics_dict.items(): + metrics_values[name].append(fn(val_labels, val_preds)) + all_val_preds.append(val_preds) + + metrics = {name: np.mean(values_list) for name, values_list in metrics_values.items()} + return metrics, all_val_preds + def _validate_multilabel(self, data_is_multilabel: bool) -> None: if data_is_multilabel and not self.supports_multilabel: msg = f'"{self.name}" module is incompatible with multi-label classifiction.' diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index e8916d816..8d1e42057 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -3,7 +3,6 @@ from abc import ABC, abstractmethod from typing import Any -import numpy as np import numpy.typing as npt from autointent import Context @@ -49,19 +48,13 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} - metrics_values = {name: [] for name in chosen_metrics} - all_val_scores = [] - for train_utterances, train_labels, val_utterances, val_labels in context.data_handler.validation_iterator(): - self.fit(train_utterances, train_labels) - val_scores = self.predict(val_utterances) - for name, fn in chosen_metrics.items(): - metrics_values[name].append(fn(val_labels, val_scores)) - all_val_scores.append(val_scores) + metrics_calculated, all_val_scores = self.score_metrics_cv( + chosen_metrics, context.data_handler.validation_iterator() + ) - # save all predictions unbinded to preserve folding self._artifact = ScorerArtifact(folded_scores=all_val_scores) - return {name: np.mean(values_list) for name, values_list in metrics_values.items()} + return metrics_calculated def get_assets(self) -> ScorerArtifact: """ From f3ef812f3dddc52a61a0c932ec199f268411ff60 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 06:43:55 +0300 Subject: [PATCH 09/74] implement cv iterator for embedding node --- autointent/modules/embedding/_logreg.py | 30 +++++++++------- autointent/modules/embedding/_retrieval.py | 40 +++++++++------------- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py index d729b8741..e9f15b8e4 100644 --- a/autointent/modules/embedding/_logreg.py +++ b/autointent/modules/embedding/_logreg.py @@ -1,7 +1,5 @@ """LogregAimedEmbedding class for a proxy optimzation of embedding.""" -from typing import Literal - import numpy as np from numpy.typing import NDArray from sklearn.linear_model import LogisticRegression, LogisticRegressionCV @@ -129,29 +127,35 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: self._classifier.fit(embeddings, labels) - def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]: + def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: """ Evaluate the embedding model using a specified metric function. :param context: The context containing test data and labels. - :param split: Target split :return: Computed metrics value for the test set or error code of metrics """ - if split == "validation": - utterances = context.data_handler.validation_utterances(0) - labels = context.data_handler.validation_labels(0) - elif split == "test": - utterances = context.data_handler.test_utterances() - labels = context.data_handler.test_labels() - else: - message = f"Invalid split '{split}' provided. Expected one of 'validation', or 'test'." - raise ValueError(message) + utterances = context.data_handler.validation_utterances(0) + labels = context.data_handler.validation_labels(0) probas = self.predict(utterances) metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} + return self.score_metrics((labels, probas), chosen_metrics) + def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: + """ + Evaluate the embedding model using a specified metric function. + + :param context: The context containing test data and labels. + :return: Computed metrics value for the test set or error code of metrics + """ + metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS + chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} + + metrics_calculated, _ = self.score_metrics_cv(chosen_metrics, context.data_handler.validation_iterator()) + return metrics_calculated + def get_assets(self) -> RetrieverArtifact: """ Get the classifier artifacts for this module. diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py index b7d919add..2b166aacb 100644 --- a/autointent/modules/embedding/_retrieval.py +++ b/autointent/modules/embedding/_retrieval.py @@ -1,7 +1,5 @@ """RetrievalAimedEmbedding class for a proxy optimization of embedding.""" -from typing import Literal - from autointent import Context, VectorIndex from autointent.context.optimization_info import RetrieverArtifact from autointent.custom_types import ListOfLabels @@ -109,28 +107,27 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: ) self._vector_index.add(utterances, labels) - def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]: + def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: """ Evaluate the embedding model using a specified metric function. :param context: The context containing test data and labels. - :param split: Target split :return: Computed metrics value for the test set or error code of metrics """ - if split == "validation": - utterances = context.data_handler.validation_utterances(0) - labels = context.data_handler.validation_labels(0) - elif split == "test": - utterances = context.data_handler.test_utterances() - labels = context.data_handler.test_labels() - else: - message = f"Invalid split '{split}' provided. Expected one of 'validation', or 'test'." - raise ValueError(message) - predictions, _, _ = self._vector_index.query(utterances, self.k) + utterances = context.data_handler.validation_utterances(0) + labels = context.data_handler.validation_labels(0) + predictions = self.predict(utterances) + + metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS + chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} + return self.score_metrics_ho((labels, predictions), chosen_metrics) + def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} - return self.score_metrics((labels, predictions), chosen_metrics) + + metrics_calculated, _ = self.score_metrics_cv(chosen_metrics, context.data_handler.validation_iterator()) + return metrics_calculated def get_assets(self) -> RetrieverArtifact: """ @@ -144,17 +141,12 @@ def clear_cache(self) -> None: """Clear cached data in memory used by the vector index.""" self._vector_index.clear_ram() - def predict(self, utterances: list[str]) -> tuple[list[ListOfLabels], list[list[float]], list[list[str]]]: + def predict(self, utterances: list[str]) -> list[ListOfLabels]: """ Predict the nearest neighbors for a list of utterances. :param utterances: List of utterances for which nearest neighbors are to be retrieved. - :return: A tuple containing: - - labels: List of retrieved labels for each utterance. - - distances: List of distances to the nearest neighbors. - - texts: List of retrieved text data corresponding to the neighbors. + :return: List of labels for each retrieved utterance. """ - return self._vector_index.query( - utterances, - self.k, - ) + predictions, _, _ = self._vector_index.query(utterances, self.k) + return predictions From 593744ae9155787aae290eee876d7e216ba4839c Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 06:57:46 +0300 Subject: [PATCH 10/74] add training to `score_ho` of each node --- autointent/modules/abc/_decision.py | 9 ++++++--- autointent/modules/abc/_scoring.py | 11 +++++++---- autointent/modules/embedding/_logreg.py | 11 +++++++---- autointent/modules/embedding/_retrieval.py | 11 +++++++---- 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py index b0603ddb8..391a23675 100644 --- a/autointent/modules/abc/_decision.py +++ b/autointent/modules/abc/_decision.py @@ -48,11 +48,14 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: :param split: Target split :return: Computed metrics value for the test set or error code of metrics """ - labels, scores = get_decision_evaluation_data(context, "validation") - decisions = self.predict(scores) + train_scores, train_labels = self.get_train_data(context) + self.fit(train_scores, train_labels, context.data_handler.tags) + + val_labels, val_scores = get_decision_evaluation_data(context, "validation") + decisions = self.predict(val_scores) chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics} self._artifact = DecisionArtifact(labels=decisions) - return self.score_metrics((labels, decisions), chosen_metrics) + return self.score_metrics_ho((val_labels, decisions), chosen_metrics) def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: """ diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index 8d1e42057..603ea73af 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -23,10 +23,13 @@ class ScoringModule(Module, ABC): supports_oos = False def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: - utterances = context.data_handler.validation_utterances(0) - labels = context.data_handler.validation_labels(0) + train_utterances, train_labels = self.get_train_data(context) + self.fit(train_utterances, train_labels) - scores = self.predict(utterances) + val_utterances = context.data_handler.validation_utterances(0) + val_labels = context.data_handler.validation_labels(0) + + scores = self.predict(val_utterances) self._artifact = ScorerArtifact( train_scores=self.predict(context.data_handler.train_utterances(1)), @@ -35,7 +38,7 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} - return self.score_metrics((labels, scores), chosen_metrics) + return self.score_metrics_ho((val_labels, scores), chosen_metrics) def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: """ diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py index e9f15b8e4..c50dab19d 100644 --- a/autointent/modules/embedding/_logreg.py +++ b/autointent/modules/embedding/_logreg.py @@ -134,14 +134,17 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: :param context: The context containing test data and labels. :return: Computed metrics value for the test set or error code of metrics """ - utterances = context.data_handler.validation_utterances(0) - labels = context.data_handler.validation_labels(0) + train_utterances, train_labels = self.get_train_data(context) + self.fit(train_utterances, train_labels) - probas = self.predict(utterances) + val_utterances = context.data_handler.validation_utterances(0) + val_labels = context.data_handler.validation_labels(0) + + probas = self.predict(val_utterances) metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} - return self.score_metrics((labels, probas), chosen_metrics) + return self.score_metrics_ho((val_labels, probas), chosen_metrics) def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: """ diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py index 2b166aacb..c68e62af0 100644 --- a/autointent/modules/embedding/_retrieval.py +++ b/autointent/modules/embedding/_retrieval.py @@ -114,13 +114,16 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: :param context: The context containing test data and labels. :return: Computed metrics value for the test set or error code of metrics """ - utterances = context.data_handler.validation_utterances(0) - labels = context.data_handler.validation_labels(0) - predictions = self.predict(utterances) + train_utterances, train_labels = self.get_train_data(context) + self.fit(train_utterances, train_labels) + + val_utterances = context.data_handler.validation_utterances(0) + val_labels = context.data_handler.validation_labels(0) + predictions = self.predict(val_utterances) metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} - return self.score_metrics_ho((labels, predictions), chosen_metrics) + return self.score_metrics_ho((val_labels, predictions), chosen_metrics) def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: metrics_dict = RETRIEVAL_METRICS_MULTILABEL if context.is_multilabel() else RETRIEVAL_METRICS_MULTICLASS From 6fa0b24c20ee4c2159df542e4f27f99a2ef5cfe9 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 06:58:28 +0300 Subject: [PATCH 11/74] properly define base module --- autointent/modules/abc/_base.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index 4f6635ba6..b6359f863 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -35,8 +35,7 @@ def fit(self, *args: tuple[Any], **kwargs: dict[str, Any]) -> None: :param kwargs: Kwargs to fit """ - @abstractmethod - def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, float]: + def score(self, context: Context, metrics: list[str]) -> dict[str, float]: """ Calculate metric on test set and return metric value. @@ -44,6 +43,20 @@ def score(self, context: Context, test: bool, metrics: list[str]) -> dict[str, f :param split: Split to score on :return: Computed metrics value for the test set or error code of metrics """ + if context.data_handler.scheme == "ho": + return self.score_ho(context, metrics) + if context.data_handler.scheme == "cv": + return self.score_cv(context, metrics) + msg = "Something's wrong with validation schemas" + raise RuntimeError(msg) + + @abstractmethod + def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: + ... + + @abstractmethod + def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: + ... @abstractmethod def get_assets(self) -> Artifact: From 278855c8460adf1a694c764dd28031fa74abc948 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 06:58:56 +0300 Subject: [PATCH 12/74] fix codestyle --- autointent/modules/abc/_base.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index b6359f863..ec368bc6d 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -51,12 +51,10 @@ def score(self, context: Context, metrics: list[str]) -> dict[str, float]: raise RuntimeError(msg) @abstractmethod - def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: - ... + def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: ... @abstractmethod - def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: - ... + def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: ... @abstractmethod def get_assets(self) -> Artifact: From bf9074f8100468e8618504877796439db9bb5e03 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 07:08:29 +0300 Subject: [PATCH 13/74] remove regexp node --- autointent/modules/regexp/__init__.py | 5 - autointent/modules/regexp/_regexp.py | 156 ----------------------- autointent/nodes/__init__.py | 3 +- autointent/nodes/_nodes_info/__init__.py | 3 - autointent/nodes/_nodes_info/_regexp.py | 22 ---- 5 files changed, 1 insertion(+), 188 deletions(-) delete mode 100644 autointent/modules/regexp/__init__.py delete mode 100644 autointent/modules/regexp/_regexp.py delete mode 100644 autointent/nodes/_nodes_info/_regexp.py diff --git a/autointent/modules/regexp/__init__.py b/autointent/modules/regexp/__init__.py deleted file mode 100644 index b074482ff..000000000 --- a/autointent/modules/regexp/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Weakly supported module for now.""" - -from ._regexp import RegExp - -__all__ = ["RegExp"] diff --git a/autointent/modules/regexp/_regexp.py b/autointent/modules/regexp/_regexp.py deleted file mode 100644 index f486b682b..000000000 --- a/autointent/modules/regexp/_regexp.py +++ /dev/null @@ -1,156 +0,0 @@ -"""Module for regular expressions based intent detection.""" - -import re -from typing import Any, Literal, TypedDict - -from autointent import Context -from autointent.context.data_handler._data_handler import RegexPatterns -from autointent.context.optimization_info import Artifact -from autointent.custom_types import LabelType -from autointent.metrics import REGEXP_METRICS -from autointent.modules.abc import Module -from autointent.schemas import Intent - - -class RegexPatternsCompiled(TypedDict): - """Compiled regex patterns.""" - - id: int - """Intent ID.""" - regexp_full_match: list[re.Pattern[str]] - """Compiled regex patterns for full match.""" - regexp_partial_match: list[re.Pattern[str]] - """Compiled regex patterns for partial match.""" - - -class RegExp(Module): - """Regular expressions based intent detection module.""" - - name = "regexp" - - @classmethod - def from_context(cls, context: Context) -> "RegExp": - """Initialize from context.""" - return cls() - - def fit(self, intents: list[dict[str, Any]]) -> None: - """ - Fit the model. - - :param intents: Intents to fit - """ - intents_parsed = [Intent(**dct) for dct in intents] - self.regexp_patterns = [ - RegexPatterns( - id=intent.id, - regexp_full_match=intent.regexp_full_match, - regexp_partial_match=intent.regexp_partial_match, - ) - for intent in intents_parsed - ] - self._compile_regex_patterns() - - def predict(self, utterances: list[str]) -> list[LabelType]: - """ - Predict intents for utterances. - - :param utterances: Utterances to predict - """ - return [self._predict_single(utterance)[0] for utterance in utterances] - - def predict_with_metadata( - self, - utterances: list[str], - ) -> tuple[list[LabelType], list[dict[str, Any]] | None]: - """ - Predict intents for utterances with metadata. - - :param utterances: Utterances to predict - """ - predictions, metadata = [], [] - for utterance in utterances: - prediction, matches = self._predict_single(utterance) - predictions.append(prediction) - metadata.append(matches) - return predictions, metadata - - def _match(self, utterance: str, intent_record: RegexPatternsCompiled) -> dict[str, list[str]]: - """ - Match utterance with intent record. - - :param utterance: Utterance to match - :param intent_record: Intent record to match - """ - full_matches = [ - pattern.pattern - for pattern in intent_record["regexp_full_match"] - if pattern.fullmatch(utterance) is not None - ] - partial_matches = [ - pattern.pattern - for pattern in intent_record["regexp_partial_match"] - if pattern.search(utterance) is not None - ] - return {"full_matches": full_matches, "partial_matches": partial_matches} - - def _predict_single(self, utterance: str) -> tuple[LabelType, dict[str, list[str]]]: - """ - Predict intent for a single utterance. - - :param utterance: Utterance to predict - """ - # todo test this - prediction = set() - matches: dict[str, list[str]] = {"full_matches": [], "partial_matches": []} - for intent_record in self.regexp_patterns_compiled: - intent_matches = self._match(utterance, intent_record) - if intent_matches["full_matches"] or intent_matches["partial_matches"]: - prediction.add(intent_record["id"]) - matches["full_matches"].extend(intent_matches["full_matches"]) - matches["partial_matches"].extend(intent_matches["partial_matches"]) - return list(prediction), matches - - def score(self, context: Context, split: Literal["validation", "test"], metrics: list[str]) -> dict[str, float]: - """ - Calculate metric on test set and return metric value. - - :param context: Context to score - :param split: Split to score on - :return: Computed metrics value for the test set or error code of metrics - """ - # TODO add parameter to a whole pipeline (or just to regexp module): - # whether or not to omit utterances on next stages if they were detected with regexp module - assets = { - "test_matches": list(self.predict(context.data_handler.test_utterances())), - } - if assets["test_matches"] is None: - msg = "no matches found" - raise ValueError(msg) - chosen_metrics = {name: fn for name, fn in REGEXP_METRICS.items() if name in metrics} - return self.score_metrics((context.data_handler.test_labels(), assets["test_matches"]), chosen_metrics) - - def clear_cache(self) -> None: - """Clear cache.""" - del self.regexp_patterns - - def get_assets(self) -> Artifact: - """Get assets.""" - return Artifact() - - def _compile_regex_patterns(self) -> None: - """Compile regex patterns.""" - self.regexp_patterns_compiled = [ - RegexPatternsCompiled( - id=regexp_patterns["id"], - regexp_full_match=[ - re.compile(pattern, flags=re.IGNORECASE) for pattern in regexp_patterns["regexp_full_match"] - ], - regexp_partial_match=[ - re.compile(ptn, flags=re.IGNORECASE) for ptn in regexp_patterns["regexp_partial_match"] - ], - ) - for regexp_patterns in self.regexp_patterns - ] - - def get_train_data(self, context: Context) -> tuple: - return () diff --git a/autointent/nodes/__init__.py b/autointent/nodes/__init__.py index f92cd4ef1..0257257a4 100644 --- a/autointent/nodes/__init__.py +++ b/autointent/nodes/__init__.py @@ -1,7 +1,7 @@ """Some core components used in AutoIntent behind the scenes.""" from ._inference_node import InferenceNode -from ._nodes_info import DecisionNodeInfo, EmbeddingNodeInfo, NodeInfo, RegExpNodeInfo, ScoringNodeInfo +from ._nodes_info import DecisionNodeInfo, EmbeddingNodeInfo, NodeInfo, ScoringNodeInfo from ._optimization import NodeOptimizer from .schemes import OptimizationConfig @@ -12,6 +12,5 @@ "NodeInfo", "NodeOptimizer", "OptimizationConfig", - "RegExpNodeInfo", "ScoringNodeInfo", ] diff --git a/autointent/nodes/_nodes_info/__init__.py b/autointent/nodes/_nodes_info/__init__.py index 39209acb2..ed1cf9a9b 100644 --- a/autointent/nodes/_nodes_info/__init__.py +++ b/autointent/nodes/_nodes_info/__init__.py @@ -3,14 +3,12 @@ from ._base import NodeInfo from ._decision import DecisionNodeInfo from ._embedding import EmbeddingNodeInfo -from ._regexp import RegExpNodeInfo from ._scoring import ScoringNodeInfo NODES_INFO: dict[str, NodeInfo] = { NodeType.embedding: EmbeddingNodeInfo(), NodeType.scoring: ScoringNodeInfo(), NodeType.decision: DecisionNodeInfo(), - NodeType.regexp: RegExpNodeInfo(), } __all__ = [ @@ -18,6 +16,5 @@ "DecisionNodeInfo", "EmbeddingNodeInfo", "NodeInfo", - "RegExpNodeInfo", "ScoringNodeInfo", ] diff --git a/autointent/nodes/_nodes_info/_regexp.py b/autointent/nodes/_nodes_info/_regexp.py deleted file mode 100644 index 57f98d102..000000000 --- a/autointent/nodes/_nodes_info/_regexp.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Regexp node info.""" - -from collections.abc import Mapping -from typing import ClassVar - -from autointent.custom_types import NodeType -from autointent.metrics import REGEXP_METRICS -from autointent.metrics.regexp import RegexpMetricFn -from autointent.modules.abc import Module -from autointent.modules.regexp import RegExp - -from ._base import NodeInfo - - -class RegExpNodeInfo(NodeInfo): - """Regexp node info.""" - - metrics_available: ClassVar[Mapping[str, RegexpMetricFn]] = REGEXP_METRICS - - modules_available: ClassVar[Mapping[str, type[Module]]] = {NodeType.regexp: RegExp} - - node_type = NodeType.regexp From 8f30ec905b691a93d0060c62dff2e7751c50c8aa Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 07:09:26 +0300 Subject: [PATCH 14/74] remove regexp validator --- autointent/nodes/schemes.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/autointent/nodes/schemes.py b/autointent/nodes/schemes.py index 8cd5a5ca9..58cba623b 100644 --- a/autointent/nodes/schemes.py +++ b/autointent/nodes/schemes.py @@ -8,7 +8,7 @@ from autointent.custom_types import NodeType from autointent.modules.abc import Module -from autointent.nodes import DecisionNodeInfo, EmbeddingNodeInfo, RegExpNodeInfo, ScoringNodeInfo +from autointent.nodes import DecisionNodeInfo, EmbeddingNodeInfo, ScoringNodeInfo def generate_models_and_union_type_for_classes( @@ -91,22 +91,7 @@ class ScoringNodeValidator(BaseModel): search_space: list[ScoringSearchSpaceType] -RegexpSearchSpaceType: TypeAlias = generate_models_and_union_type_for_classes( # type: ignore[valid-type] - list(RegExpNodeInfo.modules_available.values()) -) -RegexpMetrics: TypeAlias = Literal[tuple(RegExpNodeInfo.metrics_available.keys())] # type: ignore[valid-type] - - -class RegexNodeValidator(BaseModel): - """Search space configuration for the Regexp node.""" - - node_type: NodeType = NodeType.regexp - target_metric: RegexpMetrics - metrics: list[RegexpMetrics] | None = None - search_space: list[RegexpSearchSpaceType] - - -SearchSpaceTypes: TypeAlias = RegexNodeValidator | EmbeddingNodeValidator | ScoringNodeValidator | DecisionNodeValidator +SearchSpaceTypes: TypeAlias = EmbeddingNodeValidator | ScoringNodeValidator | DecisionNodeValidator class OptimizationConfig(RootModel[list[SearchSpaceTypes]]): From b2c8986b73e5c860aeffb826e5b770e26d1a7613 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 07:28:47 +0300 Subject: [PATCH 15/74] fix typing problems (except `DataHandler._split_cv`) --- autointent/_pipeline/_pipeline.py | 6 +++--- autointent/configs/_optimization.py | 3 +++ autointent/context/_context.py | 6 ++++-- autointent/context/data_handler/_data_handler.py | 6 +++--- autointent/modules/abc/_base.py | 14 ++++++++------ autointent/modules/abc/_decision.py | 10 +++++----- autointent/modules/abc/_embedding.py | 2 +- autointent/modules/abc/_scoring.py | 10 +++++++++- .../modules/scoring/_description/description.py | 2 +- autointent/nodes/_optimization/_node_optimizer.py | 2 +- 10 files changed, 38 insertions(+), 23 deletions(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index 2201575b0..42a93e415 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -3,7 +3,7 @@ import json import logging from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Literal import numpy as np import yaml @@ -122,7 +122,7 @@ def _is_inference(self) -> bool: """ return isinstance(self.nodes[NodeType.scoring], InferenceNode) - def fit(self, dataset: Dataset) -> Context: + def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> Context: """ Optimize the pipeline from dataset. @@ -134,7 +134,7 @@ def fit(self, dataset: Dataset) -> Context: raise RuntimeError(msg) context = Context() - context.set_dataset(dataset) + context.set_dataset(dataset, scheme) context.configure_logging(self.logging_config) context.configure_vector_index(self.vector_index_config, self.embedder_config) context.configure_cross_encoder(self.cross_encoder_config) diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py index 610eece5b..27ecc52a2 100644 --- a/autointent/configs/_optimization.py +++ b/autointent/configs/_optimization.py @@ -1,6 +1,7 @@ """Configuration for the optimization process.""" from pathlib import Path +from typing import Literal from pydantic import BaseModel, Field @@ -12,6 +13,8 @@ class DataConfig(BaseModel): train_path: str | Path """Path to the training data. Can be local path or HF repo.""" + scheme: Literal["ho", "cv"] + """Hold-out or cross-validation.""" class TaskConfig(BaseModel): diff --git a/autointent/context/_context.py b/autointent/context/_context.py index 1081ea3a1..76baf70c1 100644 --- a/autointent/context/_context.py +++ b/autointent/context/_context.py @@ -3,7 +3,7 @@ import json import logging from pathlib import Path -from typing import Any +from typing import Any, Literal import yaml @@ -83,9 +83,10 @@ def configure_data(self, config: DataConfig) -> None: self.data_handler = DataHandler( dataset=load_dataset(config.train_path), random_seed=self.seed, + scheme=config.scheme ) - def set_dataset(self, dataset: Dataset) -> None: + def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"]) -> None: """ Set the datasets for training, validation and testing. @@ -94,6 +95,7 @@ def set_dataset(self, dataset: Dataset) -> None: self.data_handler = DataHandler( dataset=dataset, random_seed=self.seed, + scheme=scheme, ) def get_inference_config(self) -> dict[str, Any]: diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index f7ef69483..e5f9ba361 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -9,7 +9,7 @@ from transformers import set_seed from autointent import Dataset -from autointent.custom_types import ListOfGenericLabels, Split +from autointent.custom_types import ListOfGenericLabels, ListOfLabels, Split from ._stratification import split_dataset @@ -169,7 +169,7 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels: split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature]) - def validation_iterator(self) -> Generator[tuple[list, list, list, list]]: + def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]: if self.scheme == "ho": msg = "Cannot call cross-validation on hold-out DataHandler" raise RuntimeError(msg) @@ -180,7 +180,7 @@ def validation_iterator(self) -> Generator[tuple[list, list, list, list]]: train_folds = [i for i in range(self.n_folds) if i != j] train_utterances = [ut for i_fold in train_folds for ut in self.train_utterances(i_fold)] train_labels = [ut for i_fold in train_folds for ut in self.train_labels(i_fold)] - yield train_utterances, train_labels, val_utterances, val_labels + yield train_utterances, train_labels, val_utterances, val_labels # type: ignore[misc] msg = "something's wrong" raise RuntimeError(msg) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index ec368bc6d..2674510eb 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -12,7 +12,7 @@ from autointent._dump_tools import Dumper from autointent.context import Context from autointent.context.optimization_info import Artifact -from autointent.custom_types import ListOfGenericLabels +from autointent.custom_types import ListOfGenericLabels, ListOfLabels from autointent.exceptions import WrongClassificationError logger = logging.getLogger(__name__) @@ -133,20 +133,22 @@ def score_metrics_ho(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> d return metrics def score_metrics_cv( - self, metrics_dict: dict[str, Any], cv_iterator: Iterable[tuple[list, list, list, list]] + self, + metrics_dict: dict[str, Any], + cv_iterator: Iterable[tuple[list[str], ListOfLabels, list[str], ListOfLabels]], ) -> tuple[dict[str, float], list[ListOfGenericLabels] | list[npt.NDArray[Any]]]: - metrics_values = {name: [] for name in metrics_dict} + metrics_values: dict[str, list[float]] = {name: [] for name in metrics_dict} all_val_preds = [] for train_utterances, train_labels, val_utterances, val_labels in cv_iterator: - self.fit(train_utterances, train_labels) + self.fit(train_utterances, train_labels) # type: ignore[arg-type] val_preds = self.predict(val_utterances) for name, fn in metrics_dict.items(): metrics_values[name].append(fn(val_labels, val_preds)) all_val_preds.append(val_preds) - metrics = {name: np.mean(values_list) for name, values_list in metrics_values.items()} - return metrics, all_val_preds + metrics = {name: float(np.mean(values_list)) for name, values_list in metrics_values.items()} + return metrics, all_val_preds # type: ignore[return-value] def _validate_multilabel(self, data_is_multilabel: bool) -> None: if data_is_multilabel and not self.supports_multilabel: diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py index 391a23675..6d78d050a 100644 --- a/autointent/modules/abc/_decision.py +++ b/autointent/modules/abc/_decision.py @@ -48,8 +48,8 @@ def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: :param split: Target split :return: Computed metrics value for the test set or error code of metrics """ - train_scores, train_labels = self.get_train_data(context) - self.fit(train_scores, train_labels, context.data_handler.tags) + train_scores, train_labels, tags = self.get_train_data(context) + self.fit(train_scores, train_labels, tags) val_labels, val_scores = get_decision_evaluation_data(context, "validation") decisions = self.predict(val_scores) @@ -73,7 +73,7 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: raise RuntimeError(msg) chosen_metrics = {name: fn for name, fn in PREDICTION_METRICS_MULTICLASS.items() if name in metrics} - metrics_values = {name: [] for name in chosen_metrics} + metrics_values: dict[str, list[float]] = {name: [] for name in chosen_metrics} all_val_decisions = [] for j in range(context.data_handler.n_folds): val_labels = labels[j] @@ -81,14 +81,14 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: train_folds = [i for i in range(context.data_handler.n_folds) if i != j] train_labels = [ut for i_fold in train_folds for ut in labels[i_fold]] train_scores = [ut for i_fold in train_folds for ut in scores[i_fold]] - self.fit(train_scores, train_labels, context.data_handler.tags) + self.fit(train_scores, train_labels, context.data_handler.tags) # type: ignore[arg-type] val_decisions = self.predict(val_scores) for name, fn in chosen_metrics.items(): metrics_values[name].append(fn(val_labels, val_decisions)) all_val_decisions.append(val_decisions) self._artifact = DecisionArtifact(labels=[pred for pred_list in all_val_decisions for pred in pred_list]) - return {name: np.mean(values_list) for name, values_list in metrics_values.items()} + return {name: float(np.mean(values_list)) for name, values_list in metrics_values.items()} def get_assets(self) -> DecisionArtifact: """Return useful assets that represent intermediate data into context.""" diff --git a/autointent/modules/abc/_embedding.py b/autointent/modules/abc/_embedding.py index 708a749eb..5a30222b6 100644 --- a/autointent/modules/abc/_embedding.py +++ b/autointent/modules/abc/_embedding.py @@ -11,4 +11,4 @@ class EmbeddingModule(Module, ABC): """Base class for embedding modules.""" def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]: - return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) + return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) # type: ignore[return-value] diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index 603ea73af..5005e4662 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -22,6 +22,14 @@ class ScoringModule(Module, ABC): supports_oos = False + @abstractmethod + def fit( + self, + utterances: list[str], + labels: ListOfLabels, + ) -> None: + ... + def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: train_utterances, train_labels = self.get_train_data(context) self.fit(train_utterances, train_labels) @@ -68,7 +76,7 @@ def get_assets(self) -> ScorerArtifact: return self._artifact def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]: - return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) + return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) # type: ignore[return-value] @abstractmethod def predict(self, utterances: list[str]) -> npt.NDArray[Any]: diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py index 071837cf2..d84b7d6a7 100644 --- a/autointent/modules/scoring/_description/description.py +++ b/autointent/modules/scoring/_description/description.py @@ -148,7 +148,7 @@ def clear_cache(self) -> None: self._embedder.clear_ram() def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels, list[str]]: - return ( + return ( # type: ignore[return-value] context.data_handler.train_utterances(0), context.data_handler.train_labels(0), context.data_handler.intent_descriptions, diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 2dd481bfa..2271f9feb 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -67,7 +67,7 @@ def fit(self, context: Context) -> None: module_kwargs["embedder_name"] = embedder_name self._logger.debug("scoring %s module...", module_name) - metrics_score = module.score(context, test=False, metrics=self.metrics) + metrics_score = module.score(context, metrics=self.metrics) metric_value = metrics_score[self.target_metric] context.callback_handler.log_metrics(metrics_score) From e24bde4c6cebca9432ec3e90db24dc72f370abc3 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 09:23:15 +0300 Subject: [PATCH 16/74] add ingore oos decorator --- autointent/metrics/retrieval.py | 134 +++++++------------------------- autointent/metrics/scoring.py | 29 ++++++- 2 files changed, 58 insertions(+), 105 deletions(-) diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py index c9512a96a..567ae60d1 100644 --- a/autointent/metrics/retrieval.py +++ b/autointent/metrics/retrieval.py @@ -1,5 +1,6 @@ """Retrieval metrics.""" +from functools import wraps from typing import Any, Protocol import numpy as np @@ -109,6 +110,21 @@ def _average_precision(query_label: int, candidate_labels: npt.NDArray[np.int64] return sum_precision / num_relevant if num_relevant > 0 else 0.0 +def ignore_oos(func: RetrievalMetricFn) -> RetrievalMetricFn: + """Ignore OOS in metrics calculation (decorator).""" + + @wraps(func) + def wrapper(query_labels: list[Any | None], candidates_labels: list[Any]) -> float: + query_labels_filtered = [lab for lab in query_labels if lab is not None] + candidates_labels_filtered = [ + cand for cand, lab in zip(candidates_labels, query_labels, strict=True) if lab is not None + ] + return func(query_labels_filtered, candidates_labels_filtered) + + return wrapper + + +@ignore_oos def retrieval_map(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None) -> float: r""" Calculate the mean average precision at position k. @@ -180,6 +196,7 @@ class of the query :math:`q`, return sum_precision / num_relevant if num_relevant > 0 else 0.0 +@ignore_oos def retrieval_map_intersecting( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -215,6 +232,7 @@ def retrieval_map_intersecting( return sum(ap_list) / len(ap_list) +@ignore_oos def retrieval_map_macro( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -235,47 +253,7 @@ def retrieval_map_macro( return _macrofy(retrieval_map, query_labels, candidates_labels, k) -def _retrieval_map_numpy(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int) -> float: - r""" - Calculate mean average precision at position k. - - The mean average precision (MAP) at position :math:`k` is calculated as follows: - - .. math:: - - \text{AP}_q = \frac{1}{|R_q|} \sum_{i=1}^{k} P_q(i) \cdot \mathbb{1}(y_{\text{true},q} = y_{\text{pred},i}) - - \text{MAP}@k = \frac{1}{|Q|} \sum_{q=1}^{Q} \text{AP}_q - - where: - - :math:`\text{AP}_q` is the average precision for query :math:`q`, - - :math:`P_q(i)` is the precision at the :math:`i`-th position for query :math:`q`, - - :math:`\mathbb{1}(y_{\text{true},q} = y_{\text{pred},i})` is the indicator function that equals - 1 if the true label of the query matches the predicted label at position :math:`i` and 0 otherwise, - - :math:`|R_q|` is the total number of relevant items for query :math:`q`, - - :math:`|Q|` is the total number of queries. - - :param query_labels: For each query, this list contains its class labels - :param candidates_labels: For each query, these lists contain class labels of items ranked by a retrieval model (from most to least relevant) - :param k: Number of top items to consider for each query - :return: Score of the retrieval metric - """ # noqa: E501 - query_label_, candidates_labels_ = transform(query_labels, candidates_labels) - candidates_labels_ = candidates_labels_[:, :k] - relevance_mask = candidates_labels_ == query_label_[:, None] - cumulative_relevant = np.cumsum(relevance_mask, axis=1) - precision_at_k = cumulative_relevant * relevance_mask / np.arange(1, k + 1) - sum_precision = np.sum(precision_at_k, axis=1) - num_relevant = np.sum(relevance_mask, axis=1) - average_precision = np.divide( - sum_precision, - num_relevant, - out=np.zeros_like(sum_precision), - where=num_relevant != 0, - ) - return np.mean(average_precision) # type: ignore[no-any-return] - - +@ignore_oos def retrieval_hit_rate( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -315,6 +293,7 @@ def retrieval_hit_rate( return float(hit_count / num_queries) +@ignore_oos def retrieval_hit_rate_intersecting( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -360,6 +339,7 @@ def retrieval_hit_rate_intersecting( return float(hit_count / num_queries) +@ignore_oos def retrieval_hit_rate_macro( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -380,34 +360,7 @@ def retrieval_hit_rate_macro( return _macrofy(retrieval_hit_rate, query_labels, candidates_labels, k) -def _retrieval_hit_rate_numpy(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int) -> float: - r""" - Calculate the hit rate at position k. - - The hit rate is calculated as: - - .. math:: - - \text{Hit Rate} = \frac{\sum_{i=1}^N \mathbb{1}(y_{\text{query},i} \in y_{\text{candidates},i}^{(1:k)})}{N} - - where: - - :math:`N` is the total number of queries, - - :math:`y_{\text{query},i}` is the true label for the :math:`i`-th query, - - :math:`y_{\text{candidates},i}^{(1:k)}` is the set of top-k predicted labels for the :math:`i`-th query, - - :math:`\mathbb{1}(\text{condition})` is the indicator function that equals 1 if the condition - is true and 0 otherwise. - - :param query_labels: For each query, this list contains its class labels - :param candidates_labels: For each query, these lists contain class labels of items ranked by a retrieval model (from most to least relevant) - :param k: Number of top items to consider for each query - :return: Score of the retrieval metric - """ # noqa: E501 - query_label_, candidates_labels_ = transform(query_labels, candidates_labels) - truncated_candidates = candidates_labels_[:, :k] - hit_mask = np.isin(query_label_[:, None], truncated_candidates).any(axis=1) - return hit_mask.mean() # type: ignore[no-any-return] - - +@ignore_oos def retrieval_precision( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -449,6 +402,7 @@ def retrieval_precision( return float(total_precision / num_queries) +@ignore_oos def retrieval_precision_intersecting( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -496,6 +450,7 @@ def retrieval_precision_intersecting( return float(total_precision / num_queries) +@ignore_oos def retrieval_precision_macro( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -516,41 +471,6 @@ def retrieval_precision_macro( return _macrofy(retrieval_precision, query_labels, candidates_labels, k) -def _retrieval_precision_numpy( - query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None -) -> float: - r""" - Calculate the precision at position k. - - Precision at position :math:`k` is calculated as: - - .. math:: - - \text{Precision@k} = \frac{1}{N} \sum_{i=1}^N \frac{\sum_{j=1}^k - \mathbb{1}(y_{\text{query},i} = y_{\text{candidates},i,j})}{k} - - where: - - :math:`N` is the total number of queries, - - :math:`y_{\text{query},i}` is the true label for the :math:`i`-th query, - - :math:`y_{\text{candidates},i,j}` is the :math:`j`-th predicted label for the :math:`i`-th query, - - :math:`\mathbb{1}(\text{condition})` is the indicator function that equals 1 if the - condition is true and 0 otherwise, - - :math:`k` is the number of top candidates considered. - - :param query_labels: For each query, this list contains its class labels - :param candidates_labels: For each query, these lists contain class labels of items ranked by a retrieval model - (from most to least relevant) - :param k: Number of top items to consider for each query - :return: Score of the retrieval metric - """ - query_label_, candidates_labels_ = transform(query_labels, candidates_labels) - top_k_candidates = candidates_labels_[:, :k] - matches = (top_k_candidates == query_label_[:, None]).astype(int) - relevant_counts = np.sum(matches, axis=1) - precision_at_k = relevant_counts / k - return np.mean(precision_at_k) # type: ignore[no-any-return] - - def _dcg(relevance_scores: npt.NDArray[Any], k: int | None = None) -> float: r""" Calculate the Discounted Cumulative Gain (DCG) at position k. @@ -597,6 +517,7 @@ def _idcg(relevance_scores: npt.NDArray[Any], k: int | None = None) -> float: return _dcg(ideal_scores, k) +@ignore_oos def retrieval_ndcg(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None) -> float: r""" Calculate the Normalized Discounted Cumulative Gain (NDCG) at position k. @@ -632,6 +553,7 @@ def retrieval_ndcg(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE return float(np.mean(ndcg_scores)) +@ignore_oos def retrieval_ndcg_intersecting( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -674,6 +596,7 @@ def retrieval_ndcg_intersecting( return np.mean(ndcg_scores) # type: ignore[return-value] +@ignore_oos def retrieval_ndcg_macro( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -692,6 +615,7 @@ def retrieval_ndcg_macro( return _macrofy(retrieval_ndcg, query_labels, candidates_labels, k) +@ignore_oos def retrieval_mrr(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None) -> float: r""" Calculate the Mean Reciprocal Rank (MRR) at position k. @@ -726,6 +650,7 @@ def retrieval_mrr(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_ return float(mrr_sum / num_queries) +@ignore_oos def retrieval_mrr_intersecting( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, @@ -766,6 +691,7 @@ def retrieval_mrr_intersecting( return float(mrr_sum / num_queries) +@ignore_oos def retrieval_mrr_macro( query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, diff --git a/autointent/metrics/scoring.py b/autointent/metrics/scoring.py index 5a978544e..0c0bc9bb2 100644 --- a/autointent/metrics/scoring.py +++ b/autointent/metrics/scoring.py @@ -1,7 +1,8 @@ """Scoring metrics for multiclass and multilabel classification tasks.""" import logging -from typing import Protocol +from functools import wraps +from typing import Any, Protocol import numpy as np from sklearn.metrics import coverage_error, label_ranking_average_precision_score, label_ranking_loss, roc_auc_score @@ -29,6 +30,23 @@ def __call__(self, labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> floa ... + + +def ignore_oos(func: ScoringMetricFn) -> ScoringMetricFn: + """Ignore OOS in metrics calculation (decorator).""" + + @wraps(func) + def wrapper(labels: list[Any | None], scores: list[Any]) -> float: + labels_filtered = [lab for lab in labels if lab is not None] + scores_filtered = [ + score for score, lab in zip(scores, labels, strict=True) if lab is not None + ] + return func(labels_filtered, scores_filtered) + + return wrapper + + +@ignore_oos def scoring_log_likelihood(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE, eps: float = 1e-10) -> float: r""" Supports multiclass and multilabel cases. @@ -75,6 +93,7 @@ def scoring_log_likelihood(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE, return round(float(res), 6) +@ignore_oos def scoring_roc_auc(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: r""" Supports multiclass and multilabel cases. @@ -126,6 +145,7 @@ def _calculate_decision_metric( return res +@ignore_oos def scoring_accuracy(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: r""" Calculate accuracy for multiclass and multilabel classification. @@ -140,6 +160,7 @@ def scoring_accuracy(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> fl return _calculate_decision_metric(decision_accuracy, labels, scores) +@ignore_oos def scoring_f1(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: r""" Calculate the F1 score for multiclass and multilabel classification. @@ -154,6 +175,7 @@ def scoring_f1(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: return _calculate_decision_metric(decision_f1, labels, scores) +@ignore_oos def scoring_precision(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: r""" Calculate precision for multiclass and multilabel classification. @@ -168,6 +190,7 @@ def scoring_precision(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> f return _calculate_decision_metric(decision_precision, labels, scores) +@ignore_oos def scoring_recall(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: r""" Calculate recall for multiclass and multilabel classification. @@ -182,6 +205,7 @@ def scoring_recall(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> floa return _calculate_decision_metric(decision_recall, labels, scores) +@ignore_oos def scoring_hit_rate(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: r""" Calculate the hit rate for multilabel classification. @@ -210,6 +234,7 @@ def scoring_hit_rate(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> fl return float(np.mean(is_in)) +@ignore_oos def scoring_neg_coverage(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: """ Supports multilabel classification. @@ -246,6 +271,7 @@ def scoring_neg_coverage(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) - return float(1 - (coverage_error(labels, scores) - 1) / (n_classes - 1)) +@ignore_oos def scoring_neg_ranking_loss(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: """ Supports multilabel. @@ -262,6 +288,7 @@ def scoring_neg_ranking_loss(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYP return float(-label_ranking_loss(labels, scores)) +@ignore_oos def scoring_map(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: r""" Calculate the mean average precision (MAP) score for multilabel classification. From a74e5dd6542210968670cec17fae804b34dcc49c Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 09:23:33 +0300 Subject: [PATCH 17/74] fix codestyle --- autointent/context/_context.py | 4 +--- autointent/metrics/scoring.py | 6 +----- autointent/modules/abc/_scoring.py | 3 +-- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/autointent/context/_context.py b/autointent/context/_context.py index 76baf70c1..09810d0d6 100644 --- a/autointent/context/_context.py +++ b/autointent/context/_context.py @@ -81,9 +81,7 @@ def configure_data(self, config: DataConfig) -> None: :param config: Configuration for the data handling process. """ self.data_handler = DataHandler( - dataset=load_dataset(config.train_path), - random_seed=self.seed, - scheme=config.scheme + dataset=load_dataset(config.train_path), random_seed=self.seed, scheme=config.scheme ) def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"]) -> None: diff --git a/autointent/metrics/scoring.py b/autointent/metrics/scoring.py index 0c0bc9bb2..773f12ad6 100644 --- a/autointent/metrics/scoring.py +++ b/autointent/metrics/scoring.py @@ -30,17 +30,13 @@ def __call__(self, labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> floa ... - - def ignore_oos(func: ScoringMetricFn) -> ScoringMetricFn: """Ignore OOS in metrics calculation (decorator).""" @wraps(func) def wrapper(labels: list[Any | None], scores: list[Any]) -> float: labels_filtered = [lab for lab in labels if lab is not None] - scores_filtered = [ - score for score, lab in zip(scores, labels, strict=True) if lab is not None - ] + scores_filtered = [score for score, lab in zip(scores, labels, strict=True) if lab is not None] return func(labels_filtered, scores_filtered) return wrapper diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index 5005e4662..bb20b37e1 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -27,8 +27,7 @@ def fit( self, utterances: list[str], labels: ListOfLabels, - ) -> None: - ... + ) -> None: ... def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: train_utterances, train_labels = self.get_train_data(context) From 8b94741f99a6c106e2d8aaabe6382443d9f68780 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 09:31:14 +0300 Subject: [PATCH 18/74] fix typing --- autointent/metrics/retrieval.py | 6 +++--- autointent/metrics/scoring.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py index 567ae60d1..fd622aa87 100644 --- a/autointent/metrics/retrieval.py +++ b/autointent/metrics/retrieval.py @@ -114,14 +114,14 @@ def ignore_oos(func: RetrievalMetricFn) -> RetrievalMetricFn: """Ignore OOS in metrics calculation (decorator).""" @wraps(func) - def wrapper(query_labels: list[Any | None], candidates_labels: list[Any]) -> float: + def wrapper(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE) -> float: query_labels_filtered = [lab for lab in query_labels if lab is not None] candidates_labels_filtered = [ cand for cand, lab in zip(candidates_labels, query_labels, strict=True) if lab is not None ] - return func(query_labels_filtered, candidates_labels_filtered) + return func(query_labels_filtered, candidates_labels_filtered) # type: ignore[arg-type] - return wrapper + return wrapper # type: ignore[return-value] @ignore_oos diff --git a/autointent/metrics/scoring.py b/autointent/metrics/scoring.py index 773f12ad6..ab5023be1 100644 --- a/autointent/metrics/scoring.py +++ b/autointent/metrics/scoring.py @@ -2,7 +2,7 @@ import logging from functools import wraps -from typing import Any, Protocol +from typing import Protocol import numpy as np from sklearn.metrics import coverage_error, label_ranking_average_precision_score, label_ranking_loss, roc_auc_score @@ -34,10 +34,10 @@ def ignore_oos(func: ScoringMetricFn) -> ScoringMetricFn: """Ignore OOS in metrics calculation (decorator).""" @wraps(func) - def wrapper(labels: list[Any | None], scores: list[Any]) -> float: + def wrapper(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> float: labels_filtered = [lab for lab in labels if lab is not None] scores_filtered = [score for score, lab in zip(scores, labels, strict=True) if lab is not None] - return func(labels_filtered, scores_filtered) + return func(labels_filtered, scores_filtered) # type: ignore[arg-type] return wrapper From cfb2f25444e0627a63e766419444a50578eb369a Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 09:43:11 +0300 Subject: [PATCH 19/74] add oos handling to cv iterator --- autointent/context/data_handler/_data_handler.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index e5f9ba361..27e3b37d7 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -169,7 +169,9 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels: split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature]) - def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]: + def validation_iterator( + self + ) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]: if self.scheme == "ho": msg = "Cannot call cross-validation on hold-out DataHandler" raise RuntimeError(msg) @@ -179,11 +181,14 @@ def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[s val_labels = self.train_labels(j) train_folds = [i for i in range(self.n_folds) if i != j] train_utterances = [ut for i_fold in train_folds for ut in self.train_utterances(i_fold)] - train_labels = [ut for i_fold in train_folds for ut in self.train_labels(i_fold)] - yield train_utterances, train_labels, val_utterances, val_labels # type: ignore[misc] + train_labels = [lab for i_fold in train_folds for lab in self.train_labels(i_fold)] - msg = "something's wrong" - raise RuntimeError(msg) + # filter out all OOS samples from train + train_utterances = [ + ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None + ] + train_labels = [lab for lab in train_labels if lab is not None] + yield train_utterances, train_labels, val_utterances, val_labels # type: ignore[misc] def dump(self, filepath: str | Path) -> None: """ From e1d41a9cd02fec08c3f6b5f18ce97e6e49782b11 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 09:43:41 +0300 Subject: [PATCH 20/74] remove `DataHandler.dump()` --- autointent/context/data_handler/_data_handler.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index 27e3b37d7..b16838246 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -2,7 +2,6 @@ import logging from collections.abc import Generator -from pathlib import Path from typing import Literal, TypedDict, cast from datasets import concatenate_datasets @@ -190,14 +189,6 @@ def validation_iterator( train_labels = [lab for lab in train_labels if lab is not None] yield train_utterances, train_labels, val_utterances, val_labels # type: ignore[misc] - def dump(self, filepath: str | Path) -> None: - """ - Save the dataset splits and intents to a JSON file. - - :param filepath: The path to the file where the JSON data will be saved. - """ - self.dataset.to_json(filepath) - def _split_ho(self, random_seed: int, split_train: bool) -> None: has_validation_split = any(split.startswith(Split.VALIDATION) for split in self.dataset) From 9668dc69e0617fde01dbe300cbc9cc8afb820596 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 09:44:17 +0300 Subject: [PATCH 21/74] minor bug fix --- autointent/context/_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/context/_context.py b/autointent/context/_context.py index 09810d0d6..3e83db5a7 100644 --- a/autointent/context/_context.py +++ b/autointent/context/_context.py @@ -137,7 +137,7 @@ def dump(self) -> None: # self._logger.info(make_report(optimization_results, nodes=nodes)) # dump train and test data splits - self.data_handler.dump(logs_dir / "dataset.json") + self.data_handler.dataset.to_json(logs_dir / "dataset.json") self._logger.info("logs and other assets are saved to %s", logs_dir) From 61130bba1d16ab17eea539654f0a441e2d114127 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 10:06:27 +0300 Subject: [PATCH 22/74] implement splitting to cv folds --- .../context/data_handler/_data_handler.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index b16838246..58fb3a494 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -168,9 +168,7 @@ def test_labels(self, idx: int | None = None) -> ListOfGenericLabels: split = f"{Split.TEST}_{idx}" if idx is not None else Split.TEST return cast(ListOfGenericLabels, self.dataset[split][self.dataset.label_feature]) - def validation_iterator( - self - ) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]: + def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[str], ListOfLabels]]: if self.scheme == "ho": msg = "Cannot call cross-validation on hold-out DataHandler" raise RuntimeError(msg) @@ -183,9 +181,7 @@ def validation_iterator( train_labels = [lab for i_fold in train_folds for lab in self.train_labels(i_fold)] # filter out all OOS samples from train - train_utterances = [ - ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None - ] + train_utterances = [ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None] train_labels = [lab for lab in train_labels if lab is not None] yield train_utterances, train_labels, val_utterances, val_labels # type: ignore[misc] @@ -252,6 +248,26 @@ def _split_validation_from_test(self, random_seed: int) -> None: allow_oos_in_train=True, # both test and validation splits can contain OOS ) + def _split_cv(self, random_seed: int) -> None: + self.dataset[Split.TRAIN] = concatenate_datasets([ + self.dataset[split_name] for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST] + ]) + + if Split.TEST not in self.dataset: + self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset( + self.dataset, split=Split.TRAIN, test_size=0.2, random_seed=random_seed, allow_oos_in_train=True + ) + + for j in range(self.n_folds - 1): + self.dataset[Split.TRAIN], self.dataset[f"{Split.TRAIN}_{j}"] = split_dataset( + self.dataset, + split=Split.TRAIN, + test_size=1 / (self.n_folds - j), + random_seed=random_seed, + allow_oos_in_train=True, + ) + self.dataset[f"{Split.TRAIN}_{self.n_folds-1}"] = self.dataset.pop(Split.TRAIN) + def _split_validation_from_train(self, random_seed: int) -> None: if Split.TRAIN in self.dataset: self.dataset[Split.TRAIN], self.dataset[Split.VALIDATION] = split_dataset( From 30847185bad87848dc422844ea4f953cb7a7aa23 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 10:06:52 +0300 Subject: [PATCH 23/74] fix codestyle --- autointent/context/data_handler/_data_handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index 58fb3a494..fbb7c6987 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -249,9 +249,9 @@ def _split_validation_from_test(self, random_seed: int) -> None: ) def _split_cv(self, random_seed: int) -> None: - self.dataset[Split.TRAIN] = concatenate_datasets([ - self.dataset[split_name] for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST] - ]) + self.dataset[Split.TRAIN] = concatenate_datasets( + [self.dataset[split_name] for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]] + ) if Split.TEST not in self.dataset: self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset( From ae1383419dba768fb4b4603a893f19711cba6238 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 10:35:01 +0300 Subject: [PATCH 24/74] remove regex tests --- tests/configs/test_regex.py | 45 ------------------------------------- tests/modules/test_regex.py | 42 ---------------------------------- 2 files changed, 87 deletions(-) delete mode 100644 tests/configs/test_regex.py delete mode 100644 tests/modules/test_regex.py diff --git a/tests/configs/test_regex.py b/tests/configs/test_regex.py deleted file mode 100644 index 0e24bb11e..000000000 --- a/tests/configs/test_regex.py +++ /dev/null @@ -1,45 +0,0 @@ -import pytest -from pydantic import ValidationError - -from autointent.nodes import OptimizationConfig - - -@pytest.fixture -def valid_regexp_config(): - """Fixture for a valid RegExp node configuration.""" - return [ - {"node_type": "regexp", "target_metric": "regexp_partial_accuracy", "search_space": [{"module_name": "regexp"}]} - ] - - -def test_valid_regexp_config(valid_regexp_config): - """Test that a valid RegExp config passes validation.""" - config = OptimizationConfig(valid_regexp_config) - assert config[0].node_type == "regexp" - assert config[0].target_metric == "regexp_partial_accuracy" - assert isinstance(config[0].search_space, list) - assert config[0].search_space[0].module_name == "regexp" - - -def test_invalid_regexp_config_missing_field(): - """Test that a missing required field raises ValidationError.""" - invalid_config = { - "node_type": "regexp", - # Missing "target_metric" - "search_space": [{"module_name": "regexp"}], - } - - with pytest.raises(ValidationError): - OptimizationConfig(invalid_config) - - -def test_invalid_regexp_config_wrong_type(): - """Test that an invalid field type raises ValidationError.""" - invalid_config = { - "node_type": "regexp", - "target_metric": "regexp_partial_accuracy", - "search_space": "should_be_a_list", # Should be a list of dicts - } - - with pytest.raises(ValidationError): - OptimizationConfig(invalid_config) diff --git a/tests/modules/test_regex.py b/tests/modules/test_regex.py deleted file mode 100644 index 83912fe13..000000000 --- a/tests/modules/test_regex.py +++ /dev/null @@ -1,42 +0,0 @@ -import pytest - -from autointent.modules.regexp import RegExp - - -@pytest.mark.parametrize( - ("partial_match", "expected_predictions"), - [(".*", [[0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]), ("frozen", [[0], [0], [0], [0], [0, 1]])], -) -def test_base_regex(partial_match, expected_predictions): - train_data = [ - { - "id": 0, - "name": "accept_reservations", - "regexp_full_match": [".*"], - "regexp_partial_match": [".*"], - }, - { - "id": 1, - "name": "account_blocked", - "regexp_partial_match": [partial_match], - }, - ] - - matcher = RegExp() - matcher.fit(train_data) - - test_data = [ - "why is there a hold on my american saving bank account", - "i am nost sure why my account is blocked", - "why is there a hold on my capital one checking account", - "i think my account is blocked but i do not know the reason", - "can you tell me why is my bank account frozen", - ] - predictions = matcher.predict(test_data) - assert predictions == expected_predictions - - predictions, metadata = matcher.predict_with_metadata(test_data) - assert len(predictions) == len(test_data) == len(metadata) - - assert "partial_matches" in metadata[0] - assert "full_matches" in metadata[0] From 4c49809fc1fd30a14386ed498a504240a86b95bb Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 10:36:04 +0300 Subject: [PATCH 25/74] bug fix --- autointent/context/_context.py | 2 +- autointent/context/data_handler/_data_handler.py | 2 +- autointent/metrics/retrieval.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/autointent/context/_context.py b/autointent/context/_context.py index 3e83db5a7..dd73db842 100644 --- a/autointent/context/_context.py +++ b/autointent/context/_context.py @@ -84,7 +84,7 @@ def configure_data(self, config: DataConfig) -> None: dataset=load_dataset(config.train_path), random_seed=self.seed, scheme=config.scheme ) - def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"]) -> None: + def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> None: """ Set the datasets for training, validation and testing. diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index fbb7c6987..ca455f213 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -32,7 +32,7 @@ class DataHandler: # TODO rename to Validator def __init__( self, dataset: Dataset, - scheme: Literal["cv", "ho"], + scheme: Literal["cv", "ho"] = "ho", split_train: bool = True, random_seed: int = 0, n_folds: int = 3, diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py index fd622aa87..d208482c7 100644 --- a/autointent/metrics/retrieval.py +++ b/autointent/metrics/retrieval.py @@ -114,12 +114,12 @@ def ignore_oos(func: RetrievalMetricFn) -> RetrievalMetricFn: """Ignore OOS in metrics calculation (decorator).""" @wraps(func) - def wrapper(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE) -> float: + def wrapper(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, k: int | None = None) -> float: query_labels_filtered = [lab for lab in query_labels if lab is not None] candidates_labels_filtered = [ cand for cand, lab in zip(candidates_labels, query_labels, strict=True) if lab is not None ] - return func(query_labels_filtered, candidates_labels_filtered) # type: ignore[arg-type] + return func(query_labels_filtered, candidates_labels_filtered, k) # type: ignore[arg-type] return wrapper # type: ignore[return-value] From 2f8642f4370895e99d23aab3ec021410a28563e3 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 10:46:32 +0300 Subject: [PATCH 26/74] bug fix --- autointent/modules/abc/_scoring.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/autointent/modules/abc/_scoring.py b/autointent/modules/abc/_scoring.py index bb20b37e1..4871de547 100644 --- a/autointent/modules/abc/_scoring.py +++ b/autointent/modules/abc/_scoring.py @@ -30,8 +30,7 @@ def fit( ) -> None: ... def score_ho(self, context: Context, metrics: list[str]) -> dict[str, float]: - train_utterances, train_labels = self.get_train_data(context) - self.fit(train_utterances, train_labels) + self.fit(*self.get_train_data(context)) val_utterances = context.data_handler.validation_utterances(0) val_labels = context.data_handler.validation_labels(0) @@ -75,7 +74,7 @@ def get_assets(self) -> ScorerArtifact: return self._artifact def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels]: - return (context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) # type: ignore[return-value] + return context.data_handler.train_utterances(0), context.data_handler.train_labels(0) # type: ignore[return-value] @abstractmethod def predict(self, utterances: list[str]) -> npt.NDArray[Any]: From 13e63d17b9624de6ef43862eb141bd12adab4d7d Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 10:46:37 +0300 Subject: [PATCH 27/74] update tests --- tests/nodes/test_logreg.py | 4 ++-- tests/nodes/test_retrieval.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/nodes/test_logreg.py b/tests/nodes/test_logreg.py index 162ef7af5..4f4dd4337 100644 --- a/tests/nodes/test_logreg.py +++ b/tests/nodes/test_logreg.py @@ -24,7 +24,7 @@ def test_embedding_multiclass(): load_path=trial.module_dump_dir, ) node = InferenceNode.from_config(config) - scores = node.module.score(context, "validation", [metric]) + scores = node.module.score(context, [metric]) assert isinstance(scores, dict) node.module.clear_cache() gc.collect() @@ -44,7 +44,7 @@ def test_embedding_multilabel(): load_path=trial.module_dump_dir, ) node = InferenceNode.from_config(config) - scores = node.module.score(context, "validation", [metric]) + scores = node.module.score(context, [metric]) assert isinstance(scores, dict) node.module.clear_cache() gc.collect() diff --git a/tests/nodes/test_retrieval.py b/tests/nodes/test_retrieval.py index 43156de06..987b58b5a 100644 --- a/tests/nodes/test_retrieval.py +++ b/tests/nodes/test_retrieval.py @@ -24,7 +24,7 @@ def test_embedding_multiclass(): load_path=trial.module_dump_dir, ) node = InferenceNode.from_config(config) - labels, distances, texts = node.module.predict(["hello", "card"]) + node.module.predict(["hello", "card"]) node.module.clear_cache() gc.collect() torch.cuda.empty_cache() @@ -43,7 +43,7 @@ def test_embedding_multilabel(): load_path=trial.module_dump_dir, ) node = InferenceNode.from_config(config) - labels, distances, texts = node.module.predict(["hello", "card"]) + node.module.predict(["hello", "card"]) node.module.clear_cache() gc.collect() torch.cuda.empty_cache() From 5ac89db77e48847ba8cc625d066ae7e02a0ff341 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 10:47:09 +0300 Subject: [PATCH 28/74] fix typing --- autointent/metrics/retrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/metrics/retrieval.py b/autointent/metrics/retrieval.py index d208482c7..a9546ce9a 100644 --- a/autointent/metrics/retrieval.py +++ b/autointent/metrics/retrieval.py @@ -121,7 +121,7 @@ def wrapper(query_labels: LABELS_VALUE_TYPE, candidates_labels: CANDIDATE_TYPE, ] return func(query_labels_filtered, candidates_labels_filtered, k) # type: ignore[arg-type] - return wrapper # type: ignore[return-value] + return wrapper @ignore_oos From d4bf50f015f9350bee3d80608e5e31266642c0f2 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 11:38:36 +0300 Subject: [PATCH 29/74] big fix --- autointent/context/data_handler/_data_handler.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index ca455f213..fce8970a7 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -249,9 +249,11 @@ def _split_validation_from_test(self, random_seed: int) -> None: ) def _split_cv(self, random_seed: int) -> None: - self.dataset[Split.TRAIN] = concatenate_datasets( - [self.dataset[split_name] for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]] - ) + extra_splits = [split_name for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]] + if extra_splits: + self.dataset[Split.TRAIN] = concatenate_datasets( + [self.dataset.pop(split_name) for split_name in extra_splits] + ) if Split.TEST not in self.dataset: self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset( From 74dcb9839f889ff5763ec0b3be5ae828d6a2fe62 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 11:38:45 +0300 Subject: [PATCH 30/74] basic test on cv folding --- tests/data/test_data_handler.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/data/test_data_handler.py b/tests/data/test_data_handler.py index e93dba075..ad6f22cd4 100644 --- a/tests/data/test_data_handler.py +++ b/tests/data/test_data_handler.py @@ -173,3 +173,22 @@ def test_dataset_validation(mapping): def test_intents_validation(mapping): with pytest.raises(ValueError): # noqa: PT011 Dataset.from_dict(mapping) + + +def count_oos(split): + return len(split.filter(lambda sample: sample["label"] is None)) + + +def test_cv_folding(dataset): + DataHandler(dataset, scheme="cv", n_folds=3) + + desired_specs = { + "test": {"total": 12, "oos": 4}, + "train_0": {"total": 16, "oos": 5}, + "train_1": {"total": 16, "oos": 5}, + "train_2": {"total": 16, "oos": 6}, + } + + for split_name in dataset: + assert len(dataset[split_name]) == desired_specs[split_name]["total"] + assert count_oos(dataset[split_name]) == desired_specs[split_name]["oos"] From f13592b52f265883c9acbe6a2809a23fbd5e117b Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 11:51:16 +0300 Subject: [PATCH 31/74] add tests for metrics to ignore oos samples --- tests/metrics/test_retrieval_metrics.py | 11 +++++++++++ tests/metrics/test_retrieval_metrics_intersecting.py | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/tests/metrics/test_retrieval_metrics.py b/tests/metrics/test_retrieval_metrics.py index 84a55818f..59860831b 100644 --- a/tests/metrics/test_retrieval_metrics.py +++ b/tests/metrics/test_retrieval_metrics.py @@ -89,3 +89,14 @@ def test_ndcg(query_labels, candidates_labels, k, ground_truth): def test_mrr(query_labels, candidates_labels, k, ground_truth): output = retrieval_mrr(query_labels, candidates_labels, k) np.testing.assert_almost_equal(output, ground_truth) + + +@pytest.mark.parametrize( + ("query_labels", "candidates_labels", "ground_truth"), + [ + ([0,1,2,3], [[0,1,2], [0,1,2], [0,1,2], [0,1,2]], 0.75), + ([0,1,2,None], [[0,1,2], [0,1,2], [0,1,2], [0,1,2]], 1), + ] +) +def test_oos_ignoring(query_labels, candidates_labels, ground_truth): + assert ground_truth == retrieval_hit_rate(query_labels, candidates_labels) diff --git a/tests/metrics/test_retrieval_metrics_intersecting.py b/tests/metrics/test_retrieval_metrics_intersecting.py index b4d912878..279687204 100644 --- a/tests/metrics/test_retrieval_metrics_intersecting.py +++ b/tests/metrics/test_retrieval_metrics_intersecting.py @@ -121,3 +121,13 @@ def test_ndcg(query_labels, candidates_labels, k, ground_truth): def test_mrr(query_labels, candidates_labels, k, ground_truth): output = retrieval_mrr_intersecting(query_labels, candidates_labels, k) np.testing.assert_almost_equal(output, ground_truth) + +@pytest.mark.parametrize( + ("query_labels", "candidates_labels", "ground_truth"), + [ + ([[0,1],[0,1],[0,1],[0,1]], [[[0,1],[0,1]], [[0,1],[0,1]], [[0,1],[0,1]], [[1,0],[1,0]]], 0.75), + ([[0,1],[0,1],[0,1],None], [[[0,1],[0,1]], [[0,1],[0,1]], [[0,1],[0,1]], [[1,0],[1,0]]], 1.0), + ] +) +def test_oos_ignoring(query_labels, candidates_labels, ground_truth): + assert ground_truth == retrieval_hit_rate_intersecting(query_labels, candidates_labels) From 080d7cc1ae1065676944d31238a7ced3c88de456 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 12:00:58 +0300 Subject: [PATCH 32/74] add tests for cv iterator --- tests/data/test_data_handler.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/data/test_data_handler.py b/tests/data/test_data_handler.py index ad6f22cd4..1df3bdae4 100644 --- a/tests/data/test_data_handler.py +++ b/tests/data/test_data_handler.py @@ -192,3 +192,31 @@ def test_cv_folding(dataset): for split_name in dataset: assert len(dataset[split_name]) == desired_specs[split_name]["total"] assert count_oos(dataset[split_name]) == desired_specs[split_name]["oos"] + + +def count_oos_labels(split): + return sum(sample is None for sample in split) + +def test_cv_iterator(dataset): + dh = DataHandler(dataset, scheme="cv", n_folds=3) + + desired_specs = [{ + "train": {"total": 21, "oos": 0}, + "val": {"total": 16, "oos": 5}, + }, + { + "train": {"total": 21, "oos": 0}, + "val": {"total": 16, "oos": 5}, + }, + { + "train": {"total": 22, "oos": 0}, + "val": {"total": 16, "oos": 6}, + }, + ] + + for i, (x_train, y_train, x_val, y_val) in enumerate(dh.validation_iterator()): + specs = desired_specs[i] + assert len(x_train) == len(y_train) == specs["train"]["total"] + assert count_oos_labels(y_train) == specs["train"]["oos"] + assert len(x_val) == len(y_val) == specs["val"]["total"] + assert count_oos_labels(y_val) == specs["val"]["oos"] From bee4e73b8b0d0b01b2ef0f782eaf1163e3b98032 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 5 Feb 2025 12:01:18 +0300 Subject: [PATCH 33/74] fix codestyle --- tests/data/test_data_handler.py | 26 ++++++++++--------- tests/metrics/test_retrieval_metrics.py | 10 +++---- .../test_retrieval_metrics_intersecting.py | 15 +++++++---- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/tests/data/test_data_handler.py b/tests/data/test_data_handler.py index 1df3bdae4..8e4f60cd7 100644 --- a/tests/data/test_data_handler.py +++ b/tests/data/test_data_handler.py @@ -197,21 +197,23 @@ def test_cv_folding(dataset): def count_oos_labels(split): return sum(sample is None for sample in split) + def test_cv_iterator(dataset): dh = DataHandler(dataset, scheme="cv", n_folds=3) - desired_specs = [{ - "train": {"total": 21, "oos": 0}, - "val": {"total": 16, "oos": 5}, - }, - { - "train": {"total": 21, "oos": 0}, - "val": {"total": 16, "oos": 5}, - }, - { - "train": {"total": 22, "oos": 0}, - "val": {"total": 16, "oos": 6}, - }, + desired_specs = [ + { + "train": {"total": 21, "oos": 0}, + "val": {"total": 16, "oos": 5}, + }, + { + "train": {"total": 21, "oos": 0}, + "val": {"total": 16, "oos": 5}, + }, + { + "train": {"total": 22, "oos": 0}, + "val": {"total": 16, "oos": 6}, + }, ] for i, (x_train, y_train, x_val, y_val) in enumerate(dh.validation_iterator()): diff --git a/tests/metrics/test_retrieval_metrics.py b/tests/metrics/test_retrieval_metrics.py index 59860831b..86e4a3740 100644 --- a/tests/metrics/test_retrieval_metrics.py +++ b/tests/metrics/test_retrieval_metrics.py @@ -92,11 +92,11 @@ def test_mrr(query_labels, candidates_labels, k, ground_truth): @pytest.mark.parametrize( - ("query_labels", "candidates_labels", "ground_truth"), - [ - ([0,1,2,3], [[0,1,2], [0,1,2], [0,1,2], [0,1,2]], 0.75), - ([0,1,2,None], [[0,1,2], [0,1,2], [0,1,2], [0,1,2]], 1), - ] + ("query_labels", "candidates_labels", "ground_truth"), + [ + ([0, 1, 2, 3], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], 0.75), + ([0, 1, 2, None], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], 1), + ], ) def test_oos_ignoring(query_labels, candidates_labels, ground_truth): assert ground_truth == retrieval_hit_rate(query_labels, candidates_labels) diff --git a/tests/metrics/test_retrieval_metrics_intersecting.py b/tests/metrics/test_retrieval_metrics_intersecting.py index 279687204..5ecca6603 100644 --- a/tests/metrics/test_retrieval_metrics_intersecting.py +++ b/tests/metrics/test_retrieval_metrics_intersecting.py @@ -122,12 +122,17 @@ def test_mrr(query_labels, candidates_labels, k, ground_truth): output = retrieval_mrr_intersecting(query_labels, candidates_labels, k) np.testing.assert_almost_equal(output, ground_truth) + @pytest.mark.parametrize( - ("query_labels", "candidates_labels", "ground_truth"), - [ - ([[0,1],[0,1],[0,1],[0,1]], [[[0,1],[0,1]], [[0,1],[0,1]], [[0,1],[0,1]], [[1,0],[1,0]]], 0.75), - ([[0,1],[0,1],[0,1],None], [[[0,1],[0,1]], [[0,1],[0,1]], [[0,1],[0,1]], [[1,0],[1,0]]], 1.0), - ] + ("query_labels", "candidates_labels", "ground_truth"), + [ + ( + [[0, 1], [0, 1], [0, 1], [0, 1]], + [[[0, 1], [0, 1]], [[0, 1], [0, 1]], [[0, 1], [0, 1]], [[1, 0], [1, 0]]], + 0.75, + ), + ([[0, 1], [0, 1], [0, 1], None], [[[0, 1], [0, 1]], [[0, 1], [0, 1]], [[0, 1], [0, 1]], [[1, 0], [1, 0]]], 1.0), + ], ) def test_oos_ignoring(query_labels, candidates_labels, ground_truth): assert ground_truth == retrieval_hit_rate_intersecting(query_labels, candidates_labels) From 75e47c8b826adfe5d79ff500b7c92748a9dd7659 Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 10:51:07 +0300 Subject: [PATCH 34/74] minor bug fix --- autointent/modules/abc/_decision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/modules/abc/_decision.py b/autointent/modules/abc/_decision.py index 6d78d050a..dc128a11f 100644 --- a/autointent/modules/abc/_decision.py +++ b/autointent/modules/abc/_decision.py @@ -80,7 +80,7 @@ def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: val_scores = scores[j] train_folds = [i for i in range(context.data_handler.n_folds) if i != j] train_labels = [ut for i_fold in train_folds for ut in labels[i_fold]] - train_scores = [ut for i_fold in train_folds for ut in scores[i_fold]] + train_scores = np.array([sc for i_fold in train_folds for sc in scores[i_fold]]) self.fit(train_scores, train_labels, context.data_handler.tags) # type: ignore[arg-type] val_decisions = self.predict(val_scores) for name, fn in chosen_metrics.items(): From f64ee3b3f450db408fff4f00960437c359ceb8fc Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 12:02:12 +0300 Subject: [PATCH 35/74] fix codestyle --- autointent/generation/utterances/__init__.py | 42 +++++++++---------- .../utterances/evolution/__init__.py | 34 +++++++-------- .../evolution/chat_templates/concrete.py | 5 +-- .../evolution/chat_templates/goofy.py | 3 +- 4 files changed, 40 insertions(+), 44 deletions(-) diff --git a/autointent/generation/utterances/__init__.py b/autointent/generation/utterances/__init__.py index 7534eb1a0..006a163df 100644 --- a/autointent/generation/utterances/__init__.py +++ b/autointent/generation/utterances/__init__.py @@ -1,28 +1,28 @@ from .basic import SynthesizerChatTemplate, UtteranceGenerator from .evolution import ( - AbstractEvolution, - ConcreteEvolution, - EvolutionChatTemplate, - FormalEvolution, - FunnyEvolution, - GoofyEvolution, - InformalEvolution, - ReasoningEvolution, - UtteranceEvolver, + AbstractEvolution, + ConcreteEvolution, + EvolutionChatTemplate, + FormalEvolution, + FunnyEvolution, + GoofyEvolution, + InformalEvolution, + ReasoningEvolution, + UtteranceEvolver, ) from .generator import Generator __all__ = [ - "AbstractEvolution", - "ConcreteEvolution", - "EvolutionChatTemplate", - "FormalEvolution", - "FunnyEvolution", - "Generator", - "GoofyEvolution", - "InformalEvolution", - "ReasoningEvolution", - "SynthesizerChatTemplate", - "UtteranceEvolver", - "UtteranceGenerator", + "AbstractEvolution", + "ConcreteEvolution", + "EvolutionChatTemplate", + "FormalEvolution", + "FunnyEvolution", + "Generator", + "GoofyEvolution", + "InformalEvolution", + "ReasoningEvolution", + "SynthesizerChatTemplate", + "UtteranceEvolver", + "UtteranceGenerator", ] diff --git a/autointent/generation/utterances/evolution/__init__.py b/autointent/generation/utterances/evolution/__init__.py index 7e352bd86..596d83a3f 100644 --- a/autointent/generation/utterances/evolution/__init__.py +++ b/autointent/generation/utterances/evolution/__init__.py @@ -1,23 +1,23 @@ from .chat_templates import ( - AbstractEvolution, - ConcreteEvolution, - EvolutionChatTemplate, - FormalEvolution, - FunnyEvolution, - GoofyEvolution, - InformalEvolution, - ReasoningEvolution, + AbstractEvolution, + ConcreteEvolution, + EvolutionChatTemplate, + FormalEvolution, + FunnyEvolution, + GoofyEvolution, + InformalEvolution, + ReasoningEvolution, ) from .evolver import UtteranceEvolver __all__ = [ - "AbstractEvolution", - "ConcreteEvolution", - "EvolutionChatTemplate", - "FormalEvolution", - "FunnyEvolution", - "GoofyEvolution", - "InformalEvolution", - "ReasoningEvolution", - "UtteranceEvolver", + "AbstractEvolution", + "ConcreteEvolution", + "EvolutionChatTemplate", + "FormalEvolution", + "FunnyEvolution", + "GoofyEvolution", + "InformalEvolution", + "ReasoningEvolution", + "UtteranceEvolver", ] diff --git a/autointent/generation/utterances/evolution/chat_templates/concrete.py b/autointent/generation/utterances/evolution/chat_templates/concrete.py index 4a7ab52f2..dcca78bac 100644 --- a/autointent/generation/utterances/evolution/chat_templates/concrete.py +++ b/autointent/generation/utterances/evolution/chat_templates/concrete.py @@ -29,10 +29,7 @@ class ConcreteEvolution(EvolutionChatTemplate): Message(role=Role.ASSISTANT, content="I want to reserve a table for 4 persons at 9 pm."), Message( role=Role.USER, - content=( - "Intent name: requesting technical support\n" - "Utterance: I'm having trouble with my laptop." - ), + content=("Intent name: requesting technical support\n" "Utterance: I'm having trouble with my laptop."), ), Message(role=Role.ASSISTANT, content="My laptop is constantly rebooting and overheating."), ] diff --git a/autointent/generation/utterances/evolution/chat_templates/goofy.py b/autointent/generation/utterances/evolution/chat_templates/goofy.py index 15a6fcb17..c53156054 100644 --- a/autointent/generation/utterances/evolution/chat_templates/goofy.py +++ b/autointent/generation/utterances/evolution/chat_templates/goofy.py @@ -36,8 +36,7 @@ class GoofyEvolution(EvolutionChatTemplate): ), ), Message( - role=Role.ASSISTANT, - content="My laptop's having an existential crisis—keeps rebooting and melting. Help!" + role=Role.ASSISTANT, content="My laptop's having an existential crisis—keeps rebooting and melting. Help!" ), ] From 39e69d83682570c316963e88b39137384f299e17 Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 12:30:23 +0300 Subject: [PATCH 36/74] add test for cv --- tests/pipeline/test_optimization.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py index 050eca742..6dd35b690 100644 --- a/tests/pipeline/test_optimization.py +++ b/tests/pipeline/test_optimization.py @@ -18,35 +18,37 @@ "task_type", ["multiclass", "multilabel", "description"], ) -def test_no_context_optimization(dataset, task_type): +def test_cv(dataset, task_type): project_dir = setup_environment() search_space = get_search_space(task_type) pipeline_optimizer = Pipeline.from_search_space(search_space) - pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=False)) + pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True, clear_ram=True)) pipeline_optimizer.set_config(VectorIndexConfig()) pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu")) if task_type == "multilabel": dataset = dataset.to_multilabel() - context = pipeline_optimizer.fit(dataset) + context = pipeline_optimizer.fit(dataset, scheme="cv") context.dump() + assert os.listdir(pipeline_optimizer.logging_config.dump_dir) + @pytest.mark.parametrize( "task_type", ["multiclass", "multilabel", "description"], ) -def test_save_db(dataset, task_type): +def test_no_context_optimization(dataset, task_type): project_dir = setup_environment() search_space = get_search_space(task_type) pipeline_optimizer = Pipeline.from_search_space(search_space) - pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=False)) - pipeline_optimizer.set_config(VectorIndexConfig(save_db=True)) + pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=False, clear_ram=False)) + pipeline_optimizer.set_config(VectorIndexConfig()) pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu")) if task_type == "multilabel": @@ -66,7 +68,7 @@ def test_dump_modules(dataset, task_type): pipeline_optimizer = Pipeline.from_search_space(search_space) - pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True)) + pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True, clear_ram=True)) pipeline_optimizer.set_config(VectorIndexConfig()) pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu")) From ef11594efac8370b442c2f573202bb6049265c89 Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 12:30:28 +0300 Subject: [PATCH 37/74] bug fix --- autointent/modules/abc/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index 2674510eb..d06fe1018 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -195,7 +195,7 @@ def _get_task_specs(labels: ListOfGenericLabels) -> tuple[int, bool, bool]: contains_oos_samples = any(label is None for label in labels) in_domain_label = next(lab for lab in labels if lab is not None) multilabel = isinstance(in_domain_label, list) - n_classes = len(labels[0]) if multilabel else len(set(labels).difference([None])) # type: ignore[arg-type] + n_classes = len(in_domain_label) if multilabel else len(set(labels).difference([None])) # type: ignore[arg-type] return n_classes, multilabel, contains_oos_samples @abstractmethod From bffffb1b72d9aa6fb7eba9002a52c8835baaf13d Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 13:25:03 +0300 Subject: [PATCH 38/74] implement cv iterator for description scorer --- .../scoring/_description/description.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py index d84b7d6a7..28cd84d9a 100644 --- a/autointent/modules/scoring/_description/description.py +++ b/autointent/modules/scoring/_description/description.py @@ -8,7 +8,9 @@ from sklearn.metrics.pairwise import cosine_similarity from autointent import Context, Embedder +from autointent.context.optimization_info import ScorerArtifact from autointent.custom_types import ListOfLabels +from autointent.metrics import SCORING_METRICS_MULTICLASS, SCORING_METRICS_MULTILABEL from autointent.modules.abc import ScoringModule @@ -153,3 +155,31 @@ def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels, lis context.data_handler.train_labels(0), context.data_handler.intent_descriptions, ) + + def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: + metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS + chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} + + all_val_scores = [] + metrics_values: dict[str, list[float]] = {name: [] for name in chosen_metrics} + for j in range(context.data_handler.n_folds): + val_labels = context.data_handler.train_labels(j) + val_utterances = context.data_handler.train_utterances(j) + + train_folds = [i for i in range(context.data_handler.n_folds) if i != j] + train_labels = [lab for i_fold in train_folds for lab in context.data_handler.train_labels(i_fold)] + train_utterances = [ut for i_fold in train_folds for ut in context.data_handler.train_utterances(i_fold)] + + # filter out all OOS samples from train + train_utterances = [ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None] + train_labels = [lab for lab in train_labels if lab is not None] + + self.fit(train_utterances, train_labels, context.data_handler.intent_descriptions) # type: ignore[arg-type] + + val_scores = self.predict(val_utterances) + for name, fn in chosen_metrics.items(): + metrics_values[name].append(fn(val_labels, val_scores)) + all_val_scores.append(val_scores) + + self._artifact = ScorerArtifact(folded_scores=all_val_scores) + return {name: float(np.mean(values_list)) for name, values_list in metrics_values.items()} From 9602af840c05da9e2d4f380c13f0e6aecb7541bd Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 13:28:30 +0300 Subject: [PATCH 39/74] refactor cv iterator for description node --- autointent/modules/abc/_base.py | 3 +- .../scoring/_description/description.py | 35 ++++++++----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index d06fe1018..4ebac0af2 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -136,12 +136,13 @@ def score_metrics_cv( self, metrics_dict: dict[str, Any], cv_iterator: Iterable[tuple[list[str], ListOfLabels, list[str], ListOfLabels]], + **fit_kwargs: dict[str, Any], ) -> tuple[dict[str, float], list[ListOfGenericLabels] | list[npt.NDArray[Any]]]: metrics_values: dict[str, list[float]] = {name: [] for name in metrics_dict} all_val_preds = [] for train_utterances, train_labels, val_utterances, val_labels in cv_iterator: - self.fit(train_utterances, train_labels) # type: ignore[arg-type] + self.fit(train_utterances, train_labels, **fit_kwargs) # type: ignore[arg-type] val_preds = self.predict(val_utterances) for name, fn in metrics_dict.items(): metrics_values[name].append(fn(val_labels, val_preds)) diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py index 28cd84d9a..d16b8c18f 100644 --- a/autointent/modules/scoring/_description/description.py +++ b/autointent/modules/scoring/_description/description.py @@ -157,29 +157,22 @@ def get_train_data(self, context: Context) -> tuple[list[str], ListOfLabels, lis ) def score_cv(self, context: Context, metrics: list[str]) -> dict[str, float]: + """ + Evaluate the scorer on a test set and compute the specified metric. + + :param context: Context containing test set and other data. + :param split: Target split + :return: Computed metrics value for the test set or error code of metrics + """ metrics_dict = SCORING_METRICS_MULTILABEL if context.is_multilabel() else SCORING_METRICS_MULTICLASS chosen_metrics = {name: fn for name, fn in metrics_dict.items() if name in metrics} - all_val_scores = [] - metrics_values: dict[str, list[float]] = {name: [] for name in chosen_metrics} - for j in range(context.data_handler.n_folds): - val_labels = context.data_handler.train_labels(j) - val_utterances = context.data_handler.train_utterances(j) - - train_folds = [i for i in range(context.data_handler.n_folds) if i != j] - train_labels = [lab for i_fold in train_folds for lab in context.data_handler.train_labels(i_fold)] - train_utterances = [ut for i_fold in train_folds for ut in context.data_handler.train_utterances(i_fold)] - - # filter out all OOS samples from train - train_utterances = [ut for ut, lab in zip(train_utterances, train_labels, strict=True) if lab is not None] - train_labels = [lab for lab in train_labels if lab is not None] - - self.fit(train_utterances, train_labels, context.data_handler.intent_descriptions) # type: ignore[arg-type] - - val_scores = self.predict(val_utterances) - for name, fn in chosen_metrics.items(): - metrics_values[name].append(fn(val_labels, val_scores)) - all_val_scores.append(val_scores) + metrics_calculated, all_val_scores = self.score_metrics_cv( + chosen_metrics, + context.data_handler.validation_iterator(), + descriptions=context.data_handler.intent_descriptions, + ) self._artifact = ScorerArtifact(folded_scores=all_val_scores) - return {name: float(np.mean(values_list)) for name, values_list in metrics_values.items()} + + return metrics_calculated From ea39b36b1f9ec8d84e6ec66e551a6f09bc814338 Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 13:30:22 +0300 Subject: [PATCH 40/74] fix typing --- autointent/modules/abc/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autointent/modules/abc/_base.py b/autointent/modules/abc/_base.py index 4ebac0af2..cef5976df 100644 --- a/autointent/modules/abc/_base.py +++ b/autointent/modules/abc/_base.py @@ -132,11 +132,11 @@ def score_metrics_ho(params: tuple[Any, Any], metrics_dict: dict[str, Any]) -> d metrics[metric_name] = metric_fn(*params) return metrics - def score_metrics_cv( + def score_metrics_cv( # type: ignore[no-untyped-def] self, metrics_dict: dict[str, Any], cv_iterator: Iterable[tuple[list[str], ListOfLabels, list[str], ListOfLabels]], - **fit_kwargs: dict[str, Any], + **fit_kwargs, # noqa: ANN003 ) -> tuple[dict[str, float], list[ListOfGenericLabels] | list[npt.NDArray[Any]]]: metrics_values: dict[str, list[float]] = {name: [] for name in metrics_dict} all_val_preds = [] From 903dfa7785287b76c8ce2287a67ec5ab637c6dac Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 13:55:54 +0300 Subject: [PATCH 41/74] add cache cleaning before refitting --- autointent/_ranker.py | 5 +++++ autointent/modules/embedding/_logreg.py | 5 ++++- autointent/modules/embedding/_retrieval.py | 3 +++ autointent/modules/scoring/_description/description.py | 3 +++ autointent/modules/scoring/_dnnc/dnnc.py | 3 +++ autointent/modules/scoring/_knn/knn.py | 3 +++ autointent/modules/scoring/_knn/rerank_scorer.py | 6 ++++++ autointent/modules/scoring/_linear.py | 3 +++ autointent/modules/scoring/_mlknn/mlknn.py | 3 +++ autointent/modules/scoring/_sklearn/sklearn_scorer.py | 3 +++ 10 files changed, 36 insertions(+), 1 deletion(-) diff --git a/autointent/_ranker.py b/autointent/_ranker.py index 7d5663557..879c1b8ee 100644 --- a/autointent/_ranker.py +++ b/autointent/_ranker.py @@ -272,3 +272,8 @@ def load(cls, path: Path) -> "Ranker": metadata: CrossEncoderMetadata = json.load(file) return cls(**metadata, classifier_head=clf) + + def clear_ram(self) -> None: + self.cross_encoder.cpu() + del self.cross_encoder + torch.cuda.empty_cache() diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py index c50dab19d..89138242d 100644 --- a/autointent/modules/embedding/_logreg.py +++ b/autointent/modules/embedding/_logreg.py @@ -96,7 +96,7 @@ def from_context( ) def clear_cache(self) -> None: - pass + self._embedder.clear_ram() def fit(self, utterances: list[str], labels: ListOfLabels) -> None: """ @@ -105,6 +105,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: :param utterances: List of text data to index. :param labels: List of corresponding labels for the utterances. """ + if hasattr(self, "_embedder"): + self.clear_cache() + self._validate_task(labels) self._embedder = Embedder( diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py index c68e62af0..5063a1b00 100644 --- a/autointent/modules/embedding/_retrieval.py +++ b/autointent/modules/embedding/_retrieval.py @@ -96,6 +96,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: :param utterances: List of text data to index. :param labels: List of corresponding labels for the utterances. """ + if hasattr(self, "_vector_index"): + self.clear_cache() + self._validate_task(labels) self._vector_index = VectorIndex( diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py index d16b8c18f..79ade2aa7 100644 --- a/autointent/modules/scoring/_description/description.py +++ b/autointent/modules/scoring/_description/description.py @@ -109,6 +109,9 @@ def fit( :param descriptions: List of intent descriptions. :raises ValueError: If descriptions contain None values or embeddings mismatch utterances. """ + if hasattr(self, "_embedder"): + self._embedder.clear_ram() + self._validate_task(labels) if any(description is None for description in descriptions): diff --git a/autointent/modules/scoring/_dnnc/dnnc.py b/autointent/modules/scoring/_dnnc/dnnc.py index 6d4411f52..0610785ae 100644 --- a/autointent/modules/scoring/_dnnc/dnnc.py +++ b/autointent/modules/scoring/_dnnc/dnnc.py @@ -157,6 +157,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: :param labels: List of labels corresponding to the utterances. :raises ValueError: If the vector index mismatches the provided utterances. """ + if hasattr(self, "_vector_index"): + self.clear_cache() + self._validate_task(labels) self._vector_index = VectorIndex( diff --git a/autointent/modules/scoring/_knn/knn.py b/autointent/modules/scoring/_knn/knn.py index cda70bd9a..9b9eed8fc 100644 --- a/autointent/modules/scoring/_knn/knn.py +++ b/autointent/modules/scoring/_knn/knn.py @@ -132,6 +132,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: :param labels: List of labels corresponding to the utterances. :raises ValueError: If the vector index mismatches the provided utterances. """ + if hasattr(self, "_vector_index"): + self.clear_cache() + self._validate_task(labels) self._vector_index = VectorIndex( diff --git a/autointent/modules/scoring/_knn/rerank_scorer.py b/autointent/modules/scoring/_knn/rerank_scorer.py index 546f0737a..671a2a2d7 100644 --- a/autointent/modules/scoring/_knn/rerank_scorer.py +++ b/autointent/modules/scoring/_knn/rerank_scorer.py @@ -127,6 +127,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: :param utterances: List of utterances to fit the scorer. :param labels: List of labels corresponding to the utterances. """ + if hasattr(self, "_scorer"): + self.clear_cache() + self._scorer = Ranker( self.cross_encoder_name, device=self.cross_encoder_device, @@ -138,6 +141,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: super().fit(utterances, labels) + def clear_cache(self) -> None: + self._scorer.clear_ram() + def _predict(self, utterances: list[str]) -> tuple[npt.NDArray[Any], list[list[str]]]: """ Predict the scores and neighbors for given utterances. diff --git a/autointent/modules/scoring/_linear.py b/autointent/modules/scoring/_linear.py index b07bd0f89..e0279b31a 100644 --- a/autointent/modules/scoring/_linear.py +++ b/autointent/modules/scoring/_linear.py @@ -127,6 +127,9 @@ def fit( :param labels: List of labels corresponding to the utterances. :raises ValueError: If the vector index mismatches the provided utterances. """ + if hasattr(self, "_clf"): + self.clear_cache() + self._validate_task(labels) embedder = Embedder( diff --git a/autointent/modules/scoring/_mlknn/mlknn.py b/autointent/modules/scoring/_mlknn/mlknn.py index d0622306a..b43763ab0 100644 --- a/autointent/modules/scoring/_mlknn/mlknn.py +++ b/autointent/modules/scoring/_mlknn/mlknn.py @@ -137,6 +137,9 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: :raises TypeError: If the labels are not multi-label. :raises ValueError: If the vector index mismatches the provided utterances. """ + if hasattr(self, "_vector_index"): + self.clear_cache() + self._validate_task(labels) self._vector_index = VectorIndex( diff --git a/autointent/modules/scoring/_sklearn/sklearn_scorer.py b/autointent/modules/scoring/_sklearn/sklearn_scorer.py index a4d9753b4..a54c200bf 100644 --- a/autointent/modules/scoring/_sklearn/sklearn_scorer.py +++ b/autointent/modules/scoring/_sklearn/sklearn_scorer.py @@ -112,6 +112,9 @@ def fit( :param labels: List of labels corresponding to the utterances. :raises ValueError: If the vector index mismatches the provided utterances. """ + if hasattr(self, "_clf"): + self.clear_cache() + self._validate_task(labels) embedder = Embedder( From 5cbf83ee03749229ae50dd8b5ef93c05c3d3f765 Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 14:25:19 +0300 Subject: [PATCH 42/74] bug fix --- autointent/_ranker.py | 4 +++- autointent/modules/scoring/_knn/knn.py | 4 ++-- autointent/modules/scoring/_knn/rerank_scorer.py | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/autointent/_ranker.py b/autointent/_ranker.py index 879c1b8ee..a19b11d17 100644 --- a/autointent/_ranker.py +++ b/autointent/_ranker.py @@ -3,6 +3,7 @@ Can be used to rank retrieved sentences by meaning closeness to provided utterance. """ +import gc import itertools as it import json import logging @@ -274,6 +275,7 @@ def load(cls, path: Path) -> "Ranker": return cls(**metadata, classifier_head=clf) def clear_ram(self) -> None: - self.cross_encoder.cpu() + self.cross_encoder.model.cpu() del self.cross_encoder + gc.collect() torch.cuda.empty_cache() diff --git a/autointent/modules/scoring/_knn/knn.py b/autointent/modules/scoring/_knn/knn.py index 9b9eed8fc..ab665dfe7 100644 --- a/autointent/modules/scoring/_knn/knn.py +++ b/autointent/modules/scoring/_knn/knn.py @@ -124,7 +124,7 @@ def get_embedder_name(self) -> str: """ return self.embedder_name - def fit(self, utterances: list[str], labels: ListOfLabels) -> None: + def fit(self, utterances: list[str], labels: ListOfLabels, clear_cache: bool = False) -> None: """ Fit the scorer by training or loading the vector index. @@ -132,7 +132,7 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: :param labels: List of labels corresponding to the utterances. :raises ValueError: If the vector index mismatches the provided utterances. """ - if hasattr(self, "_vector_index"): + if hasattr(self, "_vector_index") and clear_cache: self.clear_cache() self._validate_task(labels) diff --git a/autointent/modules/scoring/_knn/rerank_scorer.py b/autointent/modules/scoring/_knn/rerank_scorer.py index 671a2a2d7..152f147ca 100644 --- a/autointent/modules/scoring/_knn/rerank_scorer.py +++ b/autointent/modules/scoring/_knn/rerank_scorer.py @@ -139,10 +139,11 @@ def fit(self, utterances: list[str], labels: ListOfLabels) -> None: ) self._scorer.fit(utterances, labels) - super().fit(utterances, labels) + super().fit(utterances, labels, clear_cache=False) def clear_cache(self) -> None: self._scorer.clear_ram() + super().clear_cache() def _predict(self, utterances: list[str]) -> tuple[npt.NDArray[Any], list[list[str]]]: """ From 89c64065f7bfcd324c175433daa65f40a4fd956e Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 15:10:25 +0300 Subject: [PATCH 43/74] implement refitting the whole pipeline with all train data --- autointent/_pipeline/_pipeline.py | 26 +++++++- .../context/data_handler/_data_handler.py | 64 ++++++++++++------- tests/pipeline/test_optimization.py | 2 +- 3 files changed, 68 insertions(+), 24 deletions(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index 42a93e415..acabe02bd 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -122,7 +122,7 @@ def _is_inference(self) -> bool: """ return isinstance(self.nodes[NodeType.scoring], InferenceNode) - def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> Context: + def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho", refit_after: bool = False) -> Context: """ Optimize the pipeline from dataset. @@ -150,6 +150,9 @@ def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> Context: self.nodes = {node.node_type: node for node in nodes_list} + if refit_after: + self._refit(context) + predictions = self.predict(context.data_handler.test_utterances()) for metric_name, metric in PREDICTION_METRICS_MULTILABEL.items(): context.optimization_info.pipeline_metrics[metric_name] = metric( @@ -210,6 +213,27 @@ def predict(self, utterances: list[str]) -> ListOfGenericLabels: scores = scoring_module.predict(utterances) return decision_module.predict(scores) + def _refit(self, context: Context) -> None: + """ + Fit pipeline of already selected modules with all train data. + + :param utterances: list of utterances + :return: list of predicted labels + """ + if not self._is_inference(): + msg = "Pipeline in optimization mode cannot perform inference" + raise RuntimeError(msg) + + scoring_module: ScoringModule = self.nodes[NodeType.scoring].module # type: ignore[assignment,union-attr] + decision_module: DecisionModule = self.nodes[NodeType.decision].module # type: ignore[assignment,union-attr] + + context.data_handler.prepare_for_refit() + + scoring_module.fit(context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) + scores = scoring_module.predict(context.data_handler.train_utterances(1)) + + decision_module.fit(scores, context.data_handler.train_labels(1)) + def predict_with_metadata(self, utterances: list[str]) -> InferencePipelineOutput: """ Predict the labels for the utterances with metadata. diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index fce8970a7..6a1d4fa59 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -46,6 +46,7 @@ def __init__( threshold search). """ set_seed(random_seed) + self.random_seed = random_seed self.dataset = dataset @@ -54,9 +55,9 @@ def __init__( self.n_folds = n_folds if scheme == "ho": - self._split_ho(random_seed, split_train) + self._split_ho(split_train) elif scheme == "cv": - self._split_cv(random_seed) + self._split_cv() self.regexp_patterns = [ RegexPatterns( @@ -185,20 +186,20 @@ def validation_iterator(self) -> Generator[tuple[list[str], ListOfLabels, list[s train_labels = [lab for lab in train_labels if lab is not None] yield train_utterances, train_labels, val_utterances, val_labels # type: ignore[misc] - def _split_ho(self, random_seed: int, split_train: bool) -> None: + def _split_ho(self, split_train: bool) -> None: has_validation_split = any(split.startswith(Split.VALIDATION) for split in self.dataset) if split_train and Split.TRAIN in self.dataset: - self._split_train(random_seed) + self._split_train() if Split.TEST not in self.dataset: test_size = 0.1 if has_validation_split else 0.2 - self._split_test(test_size, random_seed) + self._split_test(test_size) if not has_validation_split: - self._split_validation_from_train(random_seed) + self._split_validation_from_train() elif Split.VALIDATION in self.dataset: - self._split_validation(random_seed) + self._split_validation() for split in self.dataset: n_classes_split = self.dataset.get_n_classes(split) @@ -209,7 +210,7 @@ def _split_ho(self, random_seed: int, split_train: bool) -> None: ) raise ValueError(message) - def _split_train(self, random_seed: int) -> None: + def _split_train(self) -> None: """ Split on two sets. @@ -219,12 +220,12 @@ def _split_train(self, random_seed: int) -> None: self.dataset, split=Split.TRAIN, test_size=0.5, - random_seed=random_seed, + random_seed=self.random_seed, allow_oos_in_train=False, # only train data for decision node should contain OOS ) self.dataset.pop(Split.TRAIN) - def _split_validation(self, random_seed: int) -> None: + def _split_validation(self) -> None: """ Split on two sets. @@ -234,21 +235,21 @@ def _split_validation(self, random_seed: int) -> None: self.dataset, split=Split.VALIDATION, test_size=0.5, - random_seed=random_seed, + random_seed=self.random_seed, allow_oos_in_train=False, # only val data for decision node should contain OOS ) self.dataset.pop(Split.VALIDATION) - def _split_validation_from_test(self, random_seed: int) -> None: + def _split_validation_from_test(self) -> None: self.dataset[Split.TEST], self.dataset[Split.VALIDATION] = split_dataset( self.dataset, split=Split.TEST, test_size=0.5, - random_seed=random_seed, + random_seed=self.random_seed, allow_oos_in_train=True, # both test and validation splits can contain OOS ) - def _split_cv(self, random_seed: int) -> None: + def _split_cv(self) -> None: extra_splits = [split_name for split_name in self.dataset if split_name not in [Split.TRAIN, Split.TEST]] if extra_splits: self.dataset[Split.TRAIN] = concatenate_datasets( @@ -257,7 +258,7 @@ def _split_cv(self, random_seed: int) -> None: if Split.TEST not in self.dataset: self.dataset[Split.TRAIN], self.dataset[Split.TEST] = split_dataset( - self.dataset, split=Split.TRAIN, test_size=0.2, random_seed=random_seed, allow_oos_in_train=True + self.dataset, split=Split.TRAIN, test_size=0.2, random_seed=self.random_seed, allow_oos_in_train=True ) for j in range(self.n_folds - 1): @@ -265,18 +266,18 @@ def _split_cv(self, random_seed: int) -> None: self.dataset, split=Split.TRAIN, test_size=1 / (self.n_folds - j), - random_seed=random_seed, + random_seed=self.random_seed, allow_oos_in_train=True, ) self.dataset[f"{Split.TRAIN}_{self.n_folds-1}"] = self.dataset.pop(Split.TRAIN) - def _split_validation_from_train(self, random_seed: int) -> None: + def _split_validation_from_train(self) -> None: if Split.TRAIN in self.dataset: self.dataset[Split.TRAIN], self.dataset[Split.VALIDATION] = split_dataset( self.dataset, split=Split.TRAIN, test_size=0.2, - random_seed=random_seed, + random_seed=self.random_seed, allow_oos_in_train=True, ) else: @@ -285,23 +286,23 @@ def _split_validation_from_train(self, random_seed: int) -> None: self.dataset, split=f"{Split.TRAIN}_{idx}", test_size=0.2, - random_seed=random_seed, + random_seed=self.random_seed, allow_oos_in_train=idx == 1, # for decision node it's ok to have oos in train ) - def _split_test(self, test_size: float, random_seed: int) -> None: + def _split_test(self, test_size: float) -> None: """Obtain test set from train.""" self.dataset[f"{Split.TRAIN}_0"], self.dataset[f"{Split.TEST}_0"] = split_dataset( self.dataset, split=f"{Split.TRAIN}_0", test_size=test_size, - random_seed=random_seed, + random_seed=self.random_seed, ) self.dataset[f"{Split.TRAIN}_1"], self.dataset[f"{Split.TEST}_1"] = split_dataset( self.dataset, split=f"{Split.TRAIN}_1", test_size=test_size, - random_seed=random_seed, + random_seed=self.random_seed, allow_oos_in_train=True, ) self.dataset[Split.TEST] = concatenate_datasets( @@ -309,3 +310,22 @@ def _split_test(self, test_size: float, random_seed: int) -> None: ) self.dataset.pop(f"{Split.TEST}_0") self.dataset.pop(f"{Split.TEST}_1") + + def prepare_for_refit(self) -> None: + if self.scheme == "ho": + return + + train_folds = [split_name for split_name in self.dataset if split_name.startswith("train")] + self.dataset[Split.TRAIN] = concatenate_datasets([self.dataset[name] for name in train_folds]) + for name in train_folds: + self.dataset.pop(name) + + self.dataset[f"{Split.TRAIN}_0"], self.dataset[f"{Split.TRAIN}_1"] = split_dataset( + self.dataset, + split=Split.TRAIN, + test_size=0.5, + random_seed=self.random_seed, + allow_oos_in_train=False, + ) + + self.dataset.pop(Split.TRAIN) diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py index 6dd35b690..77344c9a3 100644 --- a/tests/pipeline/test_optimization.py +++ b/tests/pipeline/test_optimization.py @@ -31,7 +31,7 @@ def test_cv(dataset, task_type): if task_type == "multilabel": dataset = dataset.to_multilabel() - context = pipeline_optimizer.fit(dataset, scheme="cv") + context = pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True) context.dump() assert os.listdir(pipeline_optimizer.logging_config.dump_dir) From 7f4d7ac09db596508b076ba685432cef90f20d34 Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 15:11:24 +0300 Subject: [PATCH 44/74] fix typing --- autointent/_pipeline/_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index acabe02bd..ef8a658cc 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -229,7 +229,7 @@ def _refit(self, context: Context) -> None: context.data_handler.prepare_for_refit() - scoring_module.fit(context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) + scoring_module.fit(context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) # type: ignore[arg-type] scores = scoring_module.predict(context.data_handler.train_utterances(1)) decision_module.fit(scores, context.data_handler.train_labels(1)) From d22b2db8132b529ea56944fc4ef9c905cb089d12 Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 15:34:14 +0300 Subject: [PATCH 45/74] bug fix --- autointent/_pipeline/_pipeline.py | 4 ++-- tests/data/test_stratificaiton.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index ef8a658cc..c401d8425 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -229,10 +229,10 @@ def _refit(self, context: Context) -> None: context.data_handler.prepare_for_refit() - scoring_module.fit(context.data_handler.train_utterances(0), context.data_handler.train_labels(0)) # type: ignore[arg-type] + scoring_module.fit(*scoring_module.get_train_data(context)) # type: ignore[arg-type] scores = scoring_module.predict(context.data_handler.train_utterances(1)) - decision_module.fit(scores, context.data_handler.train_labels(1)) + decision_module.fit(scores, context.data_handler.train_labels(1), context.data_handler.tags) def predict_with_metadata(self, utterances: list[str]) -> InferencePipelineOutput: """ diff --git a/tests/data/test_stratificaiton.py b/tests/data/test_stratificaiton.py index 08ae46099..37151ddf2 100644 --- a/tests/data/test_stratificaiton.py +++ b/tests/data/test_stratificaiton.py @@ -38,6 +38,6 @@ def test_multilabel_train_test_split(dataset_unsplitted): assert Split.TRAIN in dataset assert Split.TEST in dataset - assert dataset[Split.TRAIN].num_rows == 17 - assert dataset[Split.TEST].num_rows == 19 + assert dataset[Split.TRAIN].num_rows == 19 + assert dataset[Split.TEST].num_rows == 17 assert dataset.get_n_classes(Split.TRAIN) == dataset.get_n_classes(Split.TEST) From 82e8c3f1f6b73a608f64bd3eab9a0105ff95f505 Mon Sep 17 00:00:00 2001 From: voorhs Date: Thu, 6 Feb 2025 16:00:05 +0300 Subject: [PATCH 46/74] fix typing --- autointent/_pipeline/_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index c401d8425..33c58f5b1 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -229,7 +229,7 @@ def _refit(self, context: Context) -> None: context.data_handler.prepare_for_refit() - scoring_module.fit(*scoring_module.get_train_data(context)) # type: ignore[arg-type] + scoring_module.fit(*scoring_module.get_train_data(context)) scores = scoring_module.predict(context.data_handler.train_utterances(1)) decision_module.fit(scores, context.data_handler.train_labels(1), context.data_handler.tags) From 4f2b4c8e90f8947090b7b57548570c0d80f36c54 Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 7 Feb 2025 13:23:56 +0300 Subject: [PATCH 47/74] respond to samoed --- autointent/context/data_handler/_data_handler.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index 6a1d4fa59..7aeb4fd36 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -315,10 +315,8 @@ def prepare_for_refit(self) -> None: if self.scheme == "ho": return - train_folds = [split_name for split_name in self.dataset if split_name.startswith("train")] - self.dataset[Split.TRAIN] = concatenate_datasets([self.dataset[name] for name in train_folds]) - for name in train_folds: - self.dataset.pop(name) + train_folds = [split_name for split_name in self.dataset if split_name.startswith(Split.TRAIN)] + self.dataset[Split.TRAIN] = concatenate_datasets([self.dataset.pop(name) for name in train_folds]) self.dataset[f"{Split.TRAIN}_0"], self.dataset[f"{Split.TRAIN}_1"] = split_dataset( self.dataset, From 46ea8598a3dcb9d2ecc2693f0e5be0638b5eda02 Mon Sep 17 00:00:00 2001 From: voorhs Date: Mon, 10 Feb 2025 18:23:06 +0300 Subject: [PATCH 48/74] create `ValidationType` in `autointent.custom_types` --- autointent/_pipeline/_pipeline.py | 6 +++--- autointent/configs/_optimization.py | 5 +++-- autointent/context/_context.py | 5 +++-- autointent/context/data_handler/_data_handler.py | 6 +++--- autointent/custom_types.py | 3 +++ 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index 33c58f5b1..d6e06acbb 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -3,14 +3,14 @@ import json import logging from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any import numpy as np import yaml from autointent import Context, Dataset from autointent.configs import CrossEncoderConfig, EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig -from autointent.custom_types import ListOfGenericLabels, NodeType +from autointent.custom_types import ListOfGenericLabels, NodeType, ValidationType from autointent.metrics import PREDICTION_METRICS_MULTILABEL from autointent.nodes import InferenceNode, NodeOptimizer from autointent.nodes.schemes import OptimizationConfig @@ -122,7 +122,7 @@ def _is_inference(self) -> bool: """ return isinstance(self.nodes[NodeType.scoring], InferenceNode) - def fit(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho", refit_after: bool = False) -> Context: + def fit(self, dataset: Dataset, scheme: ValidationType = "ho", refit_after: bool = False) -> Context: """ Optimize the pipeline from dataset. diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py index 27ecc52a2..23f94159f 100644 --- a/autointent/configs/_optimization.py +++ b/autointent/configs/_optimization.py @@ -1,10 +1,11 @@ """Configuration for the optimization process.""" from pathlib import Path -from typing import Literal from pydantic import BaseModel, Field +from autointent.custom_types import ValidationType + from ._name import get_run_name @@ -13,7 +14,7 @@ class DataConfig(BaseModel): train_path: str | Path """Path to the training data. Can be local path or HF repo.""" - scheme: Literal["ho", "cv"] + scheme: ValidationType """Hold-out or cross-validation.""" diff --git a/autointent/context/_context.py b/autointent/context/_context.py index dd73db842..15ef8e044 100644 --- a/autointent/context/_context.py +++ b/autointent/context/_context.py @@ -3,7 +3,7 @@ import json import logging from pathlib import Path -from typing import Any, Literal +from typing import Any import yaml @@ -16,6 +16,7 @@ LoggingConfig, VectorIndexConfig, ) +from autointent.custom_types import ValidationType from ._utils import NumpyEncoder, load_dataset from .data_handler import DataHandler @@ -84,7 +85,7 @@ def configure_data(self, config: DataConfig) -> None: dataset=load_dataset(config.train_path), random_seed=self.seed, scheme=config.scheme ) - def set_dataset(self, dataset: Dataset, scheme: Literal["ho", "cv"] = "ho") -> None: + def set_dataset(self, dataset: Dataset, scheme: ValidationType = "ho") -> None: """ Set the datasets for training, validation and testing. diff --git a/autointent/context/data_handler/_data_handler.py b/autointent/context/data_handler/_data_handler.py index 7aeb4fd36..ff450724c 100644 --- a/autointent/context/data_handler/_data_handler.py +++ b/autointent/context/data_handler/_data_handler.py @@ -2,13 +2,13 @@ import logging from collections.abc import Generator -from typing import Literal, TypedDict, cast +from typing import TypedDict, cast from datasets import concatenate_datasets from transformers import set_seed from autointent import Dataset -from autointent.custom_types import ListOfGenericLabels, ListOfLabels, Split +from autointent.custom_types import ListOfGenericLabels, ListOfLabels, Split, ValidationType from ._stratification import split_dataset @@ -32,7 +32,7 @@ class DataHandler: # TODO rename to Validator def __init__( self, dataset: Dataset, - scheme: Literal["cv", "ho"] = "ho", + scheme: ValidationType = "ho", split_train: bool = True, random_seed: int = 0, n_folds: int = 3, diff --git a/autointent/custom_types.py b/autointent/custom_types.py index c6fe95350..563d42320 100644 --- a/autointent/custom_types.py +++ b/autointent/custom_types.py @@ -69,3 +69,6 @@ class Split: VALIDATION = "validation" TEST = "test" INTENTS = "intents" + + +ValidationType = Literal["ho", "cv"] From 79e25bfa3cea2acca909b3301d2fbb9a71a54a92 Mon Sep 17 00:00:00 2001 From: voorhs Date: Mon, 10 Feb 2025 18:25:29 +0300 Subject: [PATCH 49/74] fix docstring --- autointent/_pipeline/_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index d6e06acbb..0ae703295 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -217,7 +217,7 @@ def _refit(self, context: Context) -> None: """ Fit pipeline of already selected modules with all train data. - :param utterances: list of utterances + :param context: context object to take data from :return: list of predicted labels """ if not self._is_inference(): From 231124499dc160c441ebd1e5b8e76c54e749b592 Mon Sep 17 00:00:00 2001 From: voorhs Date: Mon, 10 Feb 2025 18:36:25 +0300 Subject: [PATCH 50/74] properly expose `n_folds` argument --- autointent/_pipeline/_pipeline.py | 6 ++++-- autointent/configs/_optimization.py | 2 ++ autointent/context/_context.py | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index 0ae703295..3e07dfbbd 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -122,7 +122,9 @@ def _is_inference(self) -> bool: """ return isinstance(self.nodes[NodeType.scoring], InferenceNode) - def fit(self, dataset: Dataset, scheme: ValidationType = "ho", refit_after: bool = False) -> Context: + def fit( + self, dataset: Dataset, scheme: ValidationType = "ho", n_folds: int = 3, refit_after: bool = False + ) -> Context: """ Optimize the pipeline from dataset. @@ -134,7 +136,7 @@ def fit(self, dataset: Dataset, scheme: ValidationType = "ho", refit_after: bool raise RuntimeError(msg) context = Context() - context.set_dataset(dataset, scheme) + context.set_dataset(dataset, scheme, n_folds) context.configure_logging(self.logging_config) context.configure_vector_index(self.vector_index_config, self.embedder_config) context.configure_cross_encoder(self.cross_encoder_config) diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py index 23f94159f..4195f9520 100644 --- a/autointent/configs/_optimization.py +++ b/autointent/configs/_optimization.py @@ -16,6 +16,8 @@ class DataConfig(BaseModel): """Path to the training data. Can be local path or HF repo.""" scheme: ValidationType """Hold-out or cross-validation.""" + n_folds: int = 3 + """Number of folds in cross-validation.""" class TaskConfig(BaseModel): diff --git a/autointent/context/_context.py b/autointent/context/_context.py index 15ef8e044..aa99a75a0 100644 --- a/autointent/context/_context.py +++ b/autointent/context/_context.py @@ -85,7 +85,7 @@ def configure_data(self, config: DataConfig) -> None: dataset=load_dataset(config.train_path), random_seed=self.seed, scheme=config.scheme ) - def set_dataset(self, dataset: Dataset, scheme: ValidationType = "ho") -> None: + def set_dataset(self, dataset: Dataset, scheme: ValidationType = "ho", n_folds: int = 3) -> None: """ Set the datasets for training, validation and testing. @@ -95,6 +95,7 @@ def set_dataset(self, dataset: Dataset, scheme: ValidationType = "ho") -> None: dataset=dataset, random_seed=self.seed, scheme=scheme, + n_folds=n_folds, ) def get_inference_config(self) -> dict[str, Any]: From 2929423d32935b3376020ff4feac480f95820802 Mon Sep 17 00:00:00 2001 From: voorhs Date: Mon, 10 Feb 2025 20:39:09 +0300 Subject: [PATCH 51/74] implement `_fit_bayes` --- autointent/configs/_optimization.py | 3 +- autointent/custom_types.py | 1 + .../nodes/_optimization/_node_optimizer.py | 104 ++++++++++++++++-- 3 files changed, 100 insertions(+), 8 deletions(-) diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py index 4195f9520..c7003641b 100644 --- a/autointent/configs/_optimization.py +++ b/autointent/configs/_optimization.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, Field -from autointent.custom_types import ValidationType +from autointent.custom_types import TuningType, ValidationType from ._name import get_run_name @@ -25,6 +25,7 @@ class TaskConfig(BaseModel): search_space_path: Path | None = None """Path to the search space configuration file. If None, the default search space will be used""" + sampler: TuningType = "brute" class LoggingConfig(BaseModel): diff --git a/autointent/custom_types.py b/autointent/custom_types.py index 563d42320..215b3967a 100644 --- a/autointent/custom_types.py +++ b/autointent/custom_types.py @@ -72,3 +72,4 @@ class Split: ValidationType = Literal["ho", "cv"] +TuningType = Literal["brute", "bayes"] diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 2271f9feb..0838cfd08 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -4,16 +4,24 @@ import itertools as it import logging from copy import deepcopy +from functools import partial from pathlib import Path -from typing import Any +from typing import Any, Literal, TypedDict +import optuna import torch +from optuna.trial import Trial from autointent.context import Context -from autointent.custom_types import NodeType +from autointent.custom_types import NodeType, TuningType from autointent.nodes._nodes_info import NODES_INFO +class ParamSpace(TypedDict): + type: Literal["cat", "int", "float"] + content: list[Any] + + class NodeOptimizer: """Node optimizer class.""" @@ -42,7 +50,7 @@ def __init__( self.modules_search_spaces = search_space # TODO search space validation self._logger = logging.getLogger(__name__) # TODO solve duplicate logging messages problem - def fit(self, context: Context) -> None: + def fit(self, context: Context, tuning: TuningType = "brute") -> None: """ Fit the node optimizer. @@ -50,6 +58,17 @@ def fit(self, context: Context) -> None: """ self._logger.info("starting %s node optimization...", self.node_info.node_type) + if tuning == "brute": + self._fit_brute(context) + elif tuning == "bayes": + self._fit_bayes(context) + else: + msg = f"Unexepected tuning type: {tuning}" + raise ValueError(msg) + + self._logger.info("%s node optimization is finished!", self.node_info.node_type) + + def _fit_brute(self, context: Context) -> None: for search_space in deepcopy(self.modules_search_spaces): module_name = search_space.pop("module_name") @@ -57,15 +76,16 @@ def fit(self, context: Context) -> None: module_kwargs = dict(zip(search_space.keys(), params_combination, strict=False)) self._logger.debug("initializing %s module...", module_name) - context.callback_handler.start_module( - module_name=module_name, num=j_combination, module_kwargs=module_kwargs - ) module = self.node_info.modules_available[module_name].from_context(context, **module_kwargs) embedder_name = module.get_embedder_name() if embedder_name is not None: module_kwargs["embedder_name"] = embedder_name + context.callback_handler.start_module( + module_name=module_name, num=j_combination, module_kwargs=module_kwargs + ) + self._logger.debug("scoring %s module...", module_name) metrics_score = module.score(context, metrics=self.metrics) metric_value = metrics_score[self.target_metric] @@ -97,7 +117,77 @@ def fit(self, context: Context) -> None: gc.collect() torch.cuda.empty_cache() - self._logger.info("%s node optimization is finished!", self.node_info.node_type) + def _fit_bayes(self, context: Context, seed: int = 42, n_trials: int = 10) -> None: + self._counter = 0 + for search_space in deepcopy(self.modules_search_spaces): + study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed)) + optuna.logging.set_verbosity(optuna.logging.WARNING) + module_name = search_space.pop("module_name") + obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context) + study.optimize(obj, n_trials=n_trials) + + def objective(self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace], context: Context) -> float: + config = self.suggest(trial, search_space) + + self._logger.debug("initializing %s module...", module_name) + module = self.node_info.modules_available[module_name].from_context(context, **config) + + embedder_name = module.get_embedder_name() + if embedder_name is not None: + config["embedder_name"] = embedder_name + + context.callback_handler.start_module(module_name=module_name, num=self._counter, module_kwargs=config) + + self._logger.debug("scoring %s module...", module_name) + all_metrics = module.score(context, metrics=self.metrics) + target_metric = all_metrics[self.target_metric] + + context.callback_handler.log_metrics(all_metrics) + context.callback_handler.end_module() + + dump_dir = context.get_dump_dir() + + if dump_dir is not None: + module_dump_dir = self.get_module_dump_dir(dump_dir, module_name, self._counter) + module.dump(module_dump_dir) + else: + module_dump_dir = None + + context.optimization_info.log_module_optimization( + self.node_info.node_type, + module_name, + config, + target_metric, + self.target_metric, + module.get_assets(), # retriever name / scores / predictions + module_dump_dir, + module=module if not context.is_ram_to_clear() else None, + ) + + if context.is_ram_to_clear(): + module.clear_cache() + gc.collect() + torch.cuda.empty_cache() + + self._counter += 1 + + return target_metric + + def suggest(self, trial: Trial, search_space: dict[str, ParamSpace]) -> dict[str, Any]: + res = {} + for param_name, param_space in search_space.items(): + if param_space["type"] == "cat": + res[param_name] = trial.suggest_categorical(param_name, choices=param_space["content"]) + elif param_space["type"] == "int": + low, high, step, log = param_space["content"] + res[param_name] = trial.suggest_int(param_name, low=low, high=high, step=step, log=log) + elif param_space["type"] == "float": + low, high, step, log = param_space["content"] + res[param_name] = trial.suggest_float(param_name, low=low, high=high, step=step, log=log) + else: + msg = f"Unsupported type of param search space: {param_space['type']}" + raise RuntimeError(msg) + return res def get_module_dump_dir(self, dump_dir: Path, module_name: str, j_combination: int) -> str: """ From 43b1910fc3b5305d307e8eaf854568031ee1e620 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 21:15:23 +0300 Subject: [PATCH 52/74] add typing to param spaces --- .../nodes/_optimization/_node_optimizer.py | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 0838cfd08..c7b52d7de 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -17,9 +17,24 @@ from autointent.nodes._nodes_info import NODES_INFO +class ParamSpaceCat(TypedDict): + choices: list[Any] + +class ParamSpaceInt(TypedDict, total=False): + low: int + high: int + step: int + log: bool + +class ParamSpaceFloat(TypedDict, total=False): + low: float + high: float + step: float + log: bool + class ParamSpace(TypedDict): type: Literal["cat", "int", "float"] - content: list[Any] + content: ParamSpaceCat | ParamSpaceInt | ParamSpaceFloat class NodeOptimizer: @@ -126,7 +141,9 @@ def _fit_bayes(self, context: Context, seed: int = 42, n_trials: int = 10) -> No obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context) study.optimize(obj, n_trials=n_trials) - def objective(self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace], context: Context) -> float: + def objective( + self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace | list[Any]], context: Context + ) -> float: config = self.suggest(trial, search_space) self._logger.debug("initializing %s module...", module_name) @@ -173,17 +190,17 @@ def objective(self, trial: Trial, module_name: str, search_space: dict[str, Para return target_metric - def suggest(self, trial: Trial, search_space: dict[str, ParamSpace]) -> dict[str, Any]: - res = {} + def suggest(self, trial: Trial, search_space: dict[str, ParamSpace | list[Any]]) -> dict[str, Any]: + res: dict[str, Any] = {} for param_name, param_space in search_space.items(): - if param_space["type"] == "cat": - res[param_name] = trial.suggest_categorical(param_name, choices=param_space["content"]) + if isinstance(param_space, list): + res[param_name] = trial.suggest_categorical(param_name, choices=param_space) + elif param_space["type"] == "cat": + res[param_name] = trial.suggest_categorical(param_name, **param_space["content"]) elif param_space["type"] == "int": - low, high, step, log = param_space["content"] - res[param_name] = trial.suggest_int(param_name, low=low, high=high, step=step, log=log) + res[param_name] = trial.suggest_int(param_name, **param_space["content"]) elif param_space["type"] == "float": - low, high, step, log = param_space["content"] - res[param_name] = trial.suggest_float(param_name, low=low, high=high, step=step, log=log) + res[param_name] = trial.suggest_float(param_name, **param_space["content"]) else: msg = f"Unsupported type of param search space: {param_space['type']}" raise RuntimeError(msg) From a0ef81cec642a9d0b8ef012748449270a1a68334 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 21:16:08 +0300 Subject: [PATCH 53/74] minor bug fix --- autointent/nodes/_optimization/_node_optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index c7b52d7de..5d8980831 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -202,8 +202,8 @@ def suggest(self, trial: Trial, search_space: dict[str, ParamSpace | list[Any]]) elif param_space["type"] == "float": res[param_name] = trial.suggest_float(param_name, **param_space["content"]) else: - msg = f"Unsupported type of param search space: {param_space['type']}" - raise RuntimeError(msg) + msg = f"Unsupported type of param search space: {param_space}" + raise TypeError(msg) return res def get_module_dump_dir(self, dump_dir: Path, module_name: str, j_combination: int) -> str: From b2f4dc27472fc8a075a093aae93e7749fae4dfc2 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 21:24:19 +0300 Subject: [PATCH 54/74] minor bug fix --- autointent/nodes/_optimization/_node_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 5d8980831..1337bb585 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -133,8 +133,8 @@ def _fit_brute(self, context: Context) -> None: torch.cuda.empty_cache() def _fit_bayes(self, context: Context, seed: int = 42, n_trials: int = 10) -> None: - self._counter = 0 for search_space in deepcopy(self.modules_search_spaces): + self._counter = 0 study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed)) optuna.logging.set_verbosity(optuna.logging.WARNING) module_name = search_space.pop("module_name") From 203b5ee7f1e82e0aded0d0b0fb8927fd5503595c Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 21:25:51 +0300 Subject: [PATCH 55/74] fix codestyle --- autointent/nodes/_optimization/_node_optimizer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 1337bb585..e1acab6a7 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -20,18 +20,21 @@ class ParamSpaceCat(TypedDict): choices: list[Any] + class ParamSpaceInt(TypedDict, total=False): low: int high: int step: int log: bool + class ParamSpaceFloat(TypedDict, total=False): low: float high: float step: float log: bool + class ParamSpace(TypedDict): type: Literal["cat", "int", "float"] content: ParamSpaceCat | ParamSpaceInt | ParamSpaceFloat From a78a60c1c0c35fa166ddd73b1d5334ef1f185692 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 21:56:18 +0300 Subject: [PATCH 56/74] add tuning selection to pipeline --- autointent/_pipeline/_pipeline.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index a6a7905f7..271efc432 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -10,7 +10,7 @@ from autointent import Context, Dataset from autointent.configs import CrossEncoderConfig, EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig -from autointent.custom_types import ListOfGenericLabels, NodeType, ValidationScheme +from autointent.custom_types import ListOfGenericLabels, NodeType, TuningType, ValidationScheme from autointent.metrics import PREDICTION_METRICS_MULTILABEL from autointent.nodes import InferenceNode, NodeOptimizer from autointent.nodes.schemes import OptimizationConfig @@ -93,7 +93,7 @@ def default_optimizer(cls, multilabel: bool, seed: int = 42) -> "Pipeline": """ return cls.from_search_space(search_space=load_default_search_space(multilabel), seed=seed) - def _fit(self, context: Context) -> None: + def _fit(self, context: Context, tuning: TuningType = "brute") -> None: """ Optimize the pipeline. @@ -108,7 +108,7 @@ def _fit(self, context: Context) -> None: for node_type in NodeType: node_optimizer = self.nodes.get(node_type, None) if node_optimizer is not None: - node_optimizer.fit(context) # type: ignore[union-attr] + node_optimizer.fit(context, tuning) # type: ignore[union-attr] if not context.vector_index_config.save_db: self._logger.info("removing vector database from file system...") # TODO clear cache from appdirs @@ -123,7 +123,12 @@ def _is_inference(self) -> bool: return isinstance(self.nodes[NodeType.scoring], InferenceNode) def fit( - self, dataset: Dataset, scheme: ValidationScheme = "ho", n_folds: int = 3, refit_after: bool = False + self, + dataset: Dataset, + scheme: ValidationScheme = "ho", + n_folds: int = 3, + refit_after: bool = False, + tuning: TuningType = "brute", ) -> Context: """ Optimize the pipeline from dataset. @@ -141,7 +146,7 @@ def fit( context.configure_vector_index(self.vector_index_config, self.embedder_config) context.configure_cross_encoder(self.cross_encoder_config) self.validate_modules(dataset) - self._fit(context) + self._fit(context, tuning) if context.is_ram_to_clear(): nodes_configs = context.optimization_info.get_inference_nodes_config() From 5ed5ac32d3dd773634a956ccc6722ad5f47c68e1 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 21:56:31 +0300 Subject: [PATCH 57/74] add test on bayes --- tests/assets/configs/bayes.yaml | 30 +++++++++++++++++++++++++++++ tests/conftest.py | 2 +- tests/pipeline/test_optimization.py | 14 ++++++++++++-- 3 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 tests/assets/configs/bayes.yaml diff --git a/tests/assets/configs/bayes.yaml b/tests/assets/configs/bayes.yaml new file mode 100644 index 000000000..5a31f4ec4 --- /dev/null +++ b/tests/assets/configs/bayes.yaml @@ -0,0 +1,30 @@ +- node_type: embedding + target_metric: retrieval_hit_rate + search_space: + - module_name: retrieval + k: [10] + embedder_name: + - sentence-transformers/all-MiniLM-L6-v2 + - avsolatorio/GIST-small-Embedding-v0 +- node_type: scoring + target_metric: scoring_roc_auc + search_space: + - module_name: knn + k: + type: "int" + low: 5 + high: 10 + step: 1 + weights: [uniform, distance, closest] + - module_name: linear +- node_type: decision + target_metric: decision_accuracy + search_space: + - module_name: threshold + thresh: + type: float + low: 0.1 + high: 0.9 + - module_name: tunable + - module_name: argmax + - module_name: jinoos diff --git a/tests/conftest.py b/tests/conftest.py index 1945b3426..4d4190d4c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -27,7 +27,7 @@ def dataset_unsplitted(): return Dataset.from_json(path) -TaskType = Literal["multiclass", "multilabel", "description"] +TaskType = Literal["multiclass", "multilabel", "description", "bayes"] def get_search_space_path(task_type: TaskType): diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py index 3c4b83893..dbf0b1cb9 100644 --- a/tests/pipeline/test_optimization.py +++ b/tests/pipeline/test_optimization.py @@ -1,5 +1,4 @@ import os -from typing import Literal import pytest @@ -11,7 +10,18 @@ ) from tests.conftest import get_search_space, setup_environment -TaskType = Literal["multiclass", "multilabel", "description"] + +def test_bayes(dataset): + project_dir = setup_environment() + search_space = get_search_space("bayes") + + pipeline_optimizer = Pipeline.from_search_space(search_space) + + pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True, clear_ram=True)) + pipeline_optimizer.set_config(VectorIndexConfig()) + pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cuda")) + + pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True) @pytest.mark.parametrize( From 364e60a830c75187e242a90dadf5443be631a378 Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 22:07:25 +0300 Subject: [PATCH 58/74] disable search space validation for now --- autointent/_pipeline/_pipeline.py | 2 +- tests/assets/configs/bayes.yaml | 12 +++++++----- tests/pipeline/test_optimization.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index 271efc432..ed2bd1f8d 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -77,7 +77,7 @@ def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str, seed """ if isinstance(search_space, Path | str): search_space = load_search_space(search_space) - validated_search_space = OptimizationConfig(search_space).model_dump() # type: ignore[arg-type] + validated_search_space = search_space # OptimizationConfig(search_space).model_dump() # type: ignore[arg-type] nodes = [NodeOptimizer(**node) for node in validated_search_space] return cls(nodes=nodes, seed=seed) diff --git a/tests/assets/configs/bayes.yaml b/tests/assets/configs/bayes.yaml index 5a31f4ec4..a008c7005 100644 --- a/tests/assets/configs/bayes.yaml +++ b/tests/assets/configs/bayes.yaml @@ -12,9 +12,10 @@ - module_name: knn k: type: "int" - low: 5 - high: 10 - step: 1 + content: + low: 5 + high: 10 + step: 1 weights: [uniform, distance, closest] - module_name: linear - node_type: decision @@ -23,8 +24,9 @@ - module_name: threshold thresh: type: float - low: 0.1 - high: 0.9 + content: + low: 0.1 + high: 0.9 - module_name: tunable - module_name: argmax - module_name: jinoos diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py index dbf0b1cb9..75104d42e 100644 --- a/tests/pipeline/test_optimization.py +++ b/tests/pipeline/test_optimization.py @@ -19,9 +19,9 @@ def test_bayes(dataset): pipeline_optimizer.set_config(LoggingConfig(project_dir=project_dir, dump_modules=True, clear_ram=True)) pipeline_optimizer.set_config(VectorIndexConfig()) - pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cuda")) + pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu")) - pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True) + pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True, tuning="bayes") @pytest.mark.parametrize( From 38475e63fb113b2ac12796192e95885328c6219a Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 22:07:53 +0300 Subject: [PATCH 59/74] fix codestyle --- autointent/_pipeline/_pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index ed2bd1f8d..07d25051e 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -13,7 +13,6 @@ from autointent.custom_types import ListOfGenericLabels, NodeType, TuningType, ValidationScheme from autointent.metrics import PREDICTION_METRICS_MULTILABEL from autointent.nodes import InferenceNode, NodeOptimizer -from autointent.nodes.schemes import OptimizationConfig from autointent.utils import load_default_search_space, load_search_space from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput From 9f71c0bb5a528797f4b4367a0f825012bb7c448b Mon Sep 17 00:00:00 2001 From: voorhs Date: Tue, 11 Feb 2025 22:22:35 +0300 Subject: [PATCH 60/74] remove `ParamSpaceCat` (it's redundant) --- autointent/nodes/_optimization/_node_optimizer.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index d784cf115..010444239 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -18,10 +18,6 @@ from autointent.nodes._nodes_info import NODES_INFO -class ParamSpaceCat(TypedDict): - choices: list[Any] - - class ParamSpaceInt(TypedDict, total=False): low: int high: int @@ -38,7 +34,7 @@ class ParamSpaceFloat(TypedDict, total=False): class ParamSpace(TypedDict): type: Literal["cat", "int", "float"] - content: ParamSpaceCat | ParamSpaceInt | ParamSpaceFloat + content: ParamSpaceInt | ParamSpaceFloat class NodeOptimizer: @@ -199,8 +195,6 @@ def suggest(self, trial: Trial, search_space: dict[str, ParamSpace | list[Any]]) for param_name, param_space in search_space.items(): if isinstance(param_space, list): res[param_name] = trial.suggest_categorical(param_name, choices=param_space) - elif param_space["type"] == "cat": - res[param_name] = trial.suggest_categorical(param_name, **param_space["content"]) elif param_space["type"] == "int": res[param_name] = trial.suggest_int(param_name, **param_space["content"]) elif param_space["type"] == "float": From 0c5eef6abbe6e0fb84bfd4bbcc08f2612be4caf2 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 12 Feb 2025 11:24:56 +0300 Subject: [PATCH 61/74] move to optuna entirely --- autointent/custom_types.py | 2 +- .../nodes/_optimization/_node_optimizer.py | 77 ++++--------------- 2 files changed, 16 insertions(+), 63 deletions(-) diff --git a/autointent/custom_types.py b/autointent/custom_types.py index d9697b2f5..da0ee16e8 100644 --- a/autointent/custom_types.py +++ b/autointent/custom_types.py @@ -71,5 +71,5 @@ class Split: INTENTS = "intents" -TuningType = Literal["brute", "bayes"] +TuningType = Literal["brute", "bayes", "random"] ValidationScheme = Literal["ho", "cv"] diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 010444239..d18a48737 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -1,7 +1,6 @@ """Node optimizer.""" import gc -import itertools as it import logging from copy import deepcopy from functools import partial @@ -73,74 +72,28 @@ def fit(self, context: Context, tuning: TuningType = "brute") -> None: """ self._logger.info("starting %s node optimization...", self.node_info.node_type) - if tuning == "brute": - self._fit_brute(context) - elif tuning == "bayes": - self._fit_bayes(context) - else: - msg = f"Unexepected tuning type: {tuning}" - raise ValueError(msg) - - self._logger.info("%s node optimization is finished!", self.node_info.node_type) - - def _fit_brute(self, context: Context) -> None: - for search_space in deepcopy(self.modules_search_spaces): - module_name = search_space.pop("module_name") - - for j_combination, params_combination in enumerate(it.product(*search_space.values())): - module_kwargs = dict(zip(search_space.keys(), params_combination, strict=False)) - - self._logger.debug("initializing %s module...", module_name) - module = self.node_info.modules_available[module_name].from_context(context, **module_kwargs) - - embedder_name = module.get_embedder_name() - if embedder_name is not None: - module_kwargs["embedder_name"] = embedder_name - - context.callback_handler.start_module( - module_name=module_name, num=j_combination, module_kwargs=module_kwargs - ) - - self._logger.debug("scoring %s module...", module_name) - metrics_score = module.score(context, metrics=self.metrics) - metric_value = metrics_score[self.target_metric] - - context.callback_handler.log_metrics(metrics_score) - context.callback_handler.end_module() - - dump_dir = context.get_dump_dir() - - if dump_dir is not None: - module_dump_dir = self.get_module_dump_dir(dump_dir, module_name, j_combination) - module.dump(module_dump_dir) - else: - module_dump_dir = None - - context.optimization_info.log_module_optimization( - self.node_info.node_type, - module_name, - module_kwargs, - metric_value, - self.target_metric, - module.get_assets(), # retriever name / scores / predictions - module_dump_dir, - module=module if not context.is_ram_to_clear() else None, - ) - - if context.is_ram_to_clear(): - module.clear_cache() - gc.collect() - torch.cuda.empty_cache() - - def _fit_bayes(self, context: Context, seed: int = 42, n_trials: int = 10) -> None: for search_space in deepcopy(self.modules_search_spaces): self._counter = 0 - study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed)) + if tuning == "bayes": + sampler = optuna.samplers.TPESampler(seed=context.seed) + n_trials = 10 + elif tuning == "brute": + sampler = optuna.samplers.BruteForceSampler(seed=context.seed) # type: ignore[assignment] + n_trials = None + elif tuning == "random": + sampler = optuna.samplers.RandomSampler(seed=context.seed) # type: ignore[assignment] + n_trials = 10 + else: + msg = f"Unexpected sampler: {tuning}" + raise ValueError(msg) + study = optuna.create_study(direction="maximize", sampler=sampler) optuna.logging.set_verbosity(optuna.logging.WARNING) module_name = search_space.pop("module_name") obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context) study.optimize(obj, n_trials=n_trials) + self._logger.info("%s node optimization is finished!", self.node_info.node_type) + def objective( self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace | list[Any]], context: Context ) -> float: From 2fb6ac01cd82a11624ca72accbc439a1658d42b7 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 12 Feb 2025 12:02:05 +0300 Subject: [PATCH 62/74] refactor yaml format a little bit --- .../nodes/_optimization/_node_optimizer.py | 34 +++++++++++-------- tests/assets/configs/bayes.yaml | 15 ++++---- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index d18a48737..4ae0e420b 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -5,7 +5,7 @@ from copy import deepcopy from functools import partial from pathlib import Path -from typing import Any, Literal, TypedDict +from typing import Any, TypedDict import optuna import torch @@ -31,11 +31,6 @@ class ParamSpaceFloat(TypedDict, total=False): log: bool -class ParamSpace(TypedDict): - type: Literal["cat", "int", "float"] - content: ParamSpaceInt | ParamSpaceFloat - - class NodeOptimizer: """Node optimizer class.""" @@ -74,28 +69,35 @@ def fit(self, context: Context, tuning: TuningType = "brute") -> None: for search_space in deepcopy(self.modules_search_spaces): self._counter = 0 + module_name = search_space.pop("module_name") + n_trials = None + if "n_trials" in search_space: + n_trials = search_space.pop("n_trials") if tuning == "bayes": sampler = optuna.samplers.TPESampler(seed=context.seed) - n_trials = 10 + n_trials = n_trials or 10 elif tuning == "brute": sampler = optuna.samplers.BruteForceSampler(seed=context.seed) # type: ignore[assignment] n_trials = None elif tuning == "random": sampler = optuna.samplers.RandomSampler(seed=context.seed) # type: ignore[assignment] - n_trials = 10 + n_trials = n_trials or 10 else: msg = f"Unexpected sampler: {tuning}" raise ValueError(msg) study = optuna.create_study(direction="maximize", sampler=sampler) optuna.logging.set_verbosity(optuna.logging.WARNING) - module_name = search_space.pop("module_name") obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context) study.optimize(obj, n_trials=n_trials) self._logger.info("%s node optimization is finished!", self.node_info.node_type) def objective( - self, trial: Trial, module_name: str, search_space: dict[str, ParamSpace | list[Any]], context: Context + self, + trial: Trial, + module_name: str, + search_space: dict[str, ParamSpaceInt | ParamSpaceFloat | list[Any]], + context: Context, ) -> float: config = self.suggest(trial, search_space) @@ -143,15 +145,17 @@ def objective( return target_metric - def suggest(self, trial: Trial, search_space: dict[str, ParamSpace | list[Any]]) -> dict[str, Any]: + def suggest( + self, trial: Trial, search_space: dict[str, ParamSpaceInt | ParamSpaceFloat | list[Any]] + ) -> dict[str, Any]: res: dict[str, Any] = {} for param_name, param_space in search_space.items(): if isinstance(param_space, list): res[param_name] = trial.suggest_categorical(param_name, choices=param_space) - elif param_space["type"] == "int": - res[param_name] = trial.suggest_int(param_name, **param_space["content"]) - elif param_space["type"] == "float": - res[param_name] = trial.suggest_float(param_name, **param_space["content"]) + elif all(isinstance(v, int) for v in param_space.values()): + res[param_name] = trial.suggest_int(param_name, **param_space) + elif all(isinstance(v, float) for v in param_space.values()): + res[param_name] = trial.suggest_float(param_name, **param_space) else: msg = f"Unsupported type of param search space: {param_space}" raise TypeError(msg) diff --git a/tests/assets/configs/bayes.yaml b/tests/assets/configs/bayes.yaml index a008c7005..636e217fc 100644 --- a/tests/assets/configs/bayes.yaml +++ b/tests/assets/configs/bayes.yaml @@ -10,12 +10,11 @@ target_metric: scoring_roc_auc search_space: - module_name: knn + n_trials: 3 k: - type: "int" - content: - low: 5 - high: 10 - step: 1 + low: 5 + high: 10 + step: 1 weights: [uniform, distance, closest] - module_name: linear - node_type: decision @@ -23,10 +22,8 @@ search_space: - module_name: threshold thresh: - type: float - content: - low: 0.1 - high: 0.9 + low: 0.1 + high: 0.9 - module_name: tunable - module_name: argmax - module_name: jinoos From 5072810aea54dc4859d607166374eb977613446c Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 12 Feb 2025 12:12:06 +0300 Subject: [PATCH 63/74] add test for random sampler --- tests/pipeline/test_optimization.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py index 75104d42e..9549c6fd8 100644 --- a/tests/pipeline/test_optimization.py +++ b/tests/pipeline/test_optimization.py @@ -11,7 +11,11 @@ from tests.conftest import get_search_space, setup_environment -def test_bayes(dataset): +@pytest.mark.parametrize( + "tuning", + ["bayes", "random"], +) +def test_bayes(dataset, tuning): project_dir = setup_environment() search_space = get_search_space("bayes") @@ -21,7 +25,7 @@ def test_bayes(dataset): pipeline_optimizer.set_config(VectorIndexConfig()) pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu")) - pipeline_optimizer.fit(dataset, scheme="cv", refit_after=True, tuning="bayes") + pipeline_optimizer.fit(dataset, scheme="ho", refit_after=False, tuning=tuning) @pytest.mark.parametrize( From b6669ecf4b9da9e2398402237ed139e53fac38b4 Mon Sep 17 00:00:00 2001 From: voorhs Date: Wed, 12 Feb 2025 12:15:55 +0300 Subject: [PATCH 64/74] rename some variables --- autointent/_pipeline/_pipeline.py | 10 +++++----- autointent/configs/_optimization.py | 4 ++-- autointent/custom_types.py | 2 +- .../nodes/_optimization/_node_optimizer.py | 20 +++++++++---------- .../configs/{bayes.yaml => optuna.yaml} | 0 tests/conftest.py | 2 +- tests/pipeline/test_optimization.py | 10 +++++----- 7 files changed, 24 insertions(+), 24 deletions(-) rename tests/assets/configs/{bayes.yaml => optuna.yaml} (100%) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index 07d25051e..fe9a12e50 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -10,7 +10,7 @@ from autointent import Context, Dataset from autointent.configs import CrossEncoderConfig, EmbedderConfig, InferenceNodeConfig, LoggingConfig, VectorIndexConfig -from autointent.custom_types import ListOfGenericLabels, NodeType, TuningType, ValidationScheme +from autointent.custom_types import ListOfGenericLabels, NodeType, SamplerType, ValidationScheme from autointent.metrics import PREDICTION_METRICS_MULTILABEL from autointent.nodes import InferenceNode, NodeOptimizer from autointent.utils import load_default_search_space, load_search_space @@ -92,7 +92,7 @@ def default_optimizer(cls, multilabel: bool, seed: int = 42) -> "Pipeline": """ return cls.from_search_space(search_space=load_default_search_space(multilabel), seed=seed) - def _fit(self, context: Context, tuning: TuningType = "brute") -> None: + def _fit(self, context: Context, sampler: SamplerType = "brute") -> None: """ Optimize the pipeline. @@ -107,7 +107,7 @@ def _fit(self, context: Context, tuning: TuningType = "brute") -> None: for node_type in NodeType: node_optimizer = self.nodes.get(node_type, None) if node_optimizer is not None: - node_optimizer.fit(context, tuning) # type: ignore[union-attr] + node_optimizer.fit(context, sampler) # type: ignore[union-attr] if not context.vector_index_config.save_db: self._logger.info("removing vector database from file system...") # TODO clear cache from appdirs @@ -127,7 +127,7 @@ def fit( scheme: ValidationScheme = "ho", n_folds: int = 3, refit_after: bool = False, - tuning: TuningType = "brute", + sampler: SamplerType = "brute", ) -> Context: """ Optimize the pipeline from dataset. @@ -145,7 +145,7 @@ def fit( context.configure_vector_index(self.vector_index_config, self.embedder_config) context.configure_cross_encoder(self.cross_encoder_config) self.validate_modules(dataset) - self._fit(context, tuning) + self._fit(context, sampler) if context.is_ram_to_clear(): nodes_configs = context.optimization_info.get_inference_nodes_config() diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py index bb0ab278e..fb8e2068b 100644 --- a/autointent/configs/_optimization.py +++ b/autointent/configs/_optimization.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, Field -from autointent.custom_types import TuningType, ValidationScheme +from autointent.custom_types import SamplerType, ValidationScheme from ._name import get_run_name @@ -25,7 +25,7 @@ class TaskConfig(BaseModel): search_space_path: Path | None = None """Path to the search space configuration file. If None, the default search space will be used""" - sampler: TuningType = "brute" + sampler: SamplerType = "brute" class LoggingConfig(BaseModel): diff --git a/autointent/custom_types.py b/autointent/custom_types.py index da0ee16e8..ae5f9e6e2 100644 --- a/autointent/custom_types.py +++ b/autointent/custom_types.py @@ -71,5 +71,5 @@ class Split: INTENTS = "intents" -TuningType = Literal["brute", "bayes", "random"] +SamplerType = Literal["brute", "tpe", "random"] ValidationScheme = Literal["ho", "cv"] diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 4ae0e420b..9a4a2493a 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -13,7 +13,7 @@ from autointent import Dataset from autointent.context import Context -from autointent.custom_types import NodeType, TuningType +from autointent.custom_types import NodeType, SamplerType from autointent.nodes._nodes_info import NODES_INFO @@ -59,7 +59,7 @@ def __init__( self.modules_search_spaces = search_space self._logger = logging.getLogger(__name__) # TODO solve duplicate logging messages problem - def fit(self, context: Context, tuning: TuningType = "brute") -> None: + def fit(self, context: Context, sampler: SamplerType = "brute") -> None: """ Fit the node optimizer. @@ -73,19 +73,19 @@ def fit(self, context: Context, tuning: TuningType = "brute") -> None: n_trials = None if "n_trials" in search_space: n_trials = search_space.pop("n_trials") - if tuning == "bayes": - sampler = optuna.samplers.TPESampler(seed=context.seed) + if sampler == "tpe": + sampler_instance = optuna.samplers.TPESampler(seed=context.seed) n_trials = n_trials or 10 - elif tuning == "brute": - sampler = optuna.samplers.BruteForceSampler(seed=context.seed) # type: ignore[assignment] + elif sampler == "brute": + sampler_instance = optuna.samplers.BruteForceSampler(seed=context.seed) # type: ignore[assignment] n_trials = None - elif tuning == "random": - sampler = optuna.samplers.RandomSampler(seed=context.seed) # type: ignore[assignment] + elif sampler == "random": + sampler_instance = optuna.samplers.RandomSampler(seed=context.seed) # type: ignore[assignment] n_trials = n_trials or 10 else: - msg = f"Unexpected sampler: {tuning}" + msg = f"Unexpected sampler: {sampler}" raise ValueError(msg) - study = optuna.create_study(direction="maximize", sampler=sampler) + study = optuna.create_study(direction="maximize", sampler=sampler_instance) optuna.logging.set_verbosity(optuna.logging.WARNING) obj = partial(self.objective, module_name=module_name, search_space=search_space, context=context) study.optimize(obj, n_trials=n_trials) diff --git a/tests/assets/configs/bayes.yaml b/tests/assets/configs/optuna.yaml similarity index 100% rename from tests/assets/configs/bayes.yaml rename to tests/assets/configs/optuna.yaml diff --git a/tests/conftest.py b/tests/conftest.py index 4d4190d4c..002812907 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -27,7 +27,7 @@ def dataset_unsplitted(): return Dataset.from_json(path) -TaskType = Literal["multiclass", "multilabel", "description", "bayes"] +TaskType = Literal["multiclass", "multilabel", "description", "optuna"] def get_search_space_path(task_type: TaskType): diff --git a/tests/pipeline/test_optimization.py b/tests/pipeline/test_optimization.py index 9549c6fd8..109058d9f 100644 --- a/tests/pipeline/test_optimization.py +++ b/tests/pipeline/test_optimization.py @@ -12,12 +12,12 @@ @pytest.mark.parametrize( - "tuning", - ["bayes", "random"], + "sampler", + ["tpe", "random"], ) -def test_bayes(dataset, tuning): +def test_bayes(dataset, sampler): project_dir = setup_environment() - search_space = get_search_space("bayes") + search_space = get_search_space("optuna") pipeline_optimizer = Pipeline.from_search_space(search_space) @@ -25,7 +25,7 @@ def test_bayes(dataset, tuning): pipeline_optimizer.set_config(VectorIndexConfig()) pipeline_optimizer.set_config(EmbedderConfig(batch_size=16, max_length=32, device="cpu")) - pipeline_optimizer.fit(dataset, scheme="ho", refit_after=False, tuning=tuning) + pipeline_optimizer.fit(dataset, scheme="ho", refit_after=False, sampler=sampler) @pytest.mark.parametrize( From 2b4ac0d04291e37bae7057b0295aa4597e32bbb1 Mon Sep 17 00:00:00 2001 From: Roman Solomatin Date: Fri, 14 Feb 2025 10:56:42 +0300 Subject: [PATCH 65/74] add config validation for optuna (#132) * add config validation * add validation for union types * remove debug code * remove comment * run tests on pr for all branches * fix mlknn * fix type validation --- .github/workflows/test-inference.yaml | 2 - .github/workflows/test-nodes.yaml | 2 - .github/workflows/test-optimization.yaml | 2 - .github/workflows/unit-tests.yaml | 2 - autointent/configs/_optimization.py | 4 +- autointent/custom_types.py | 8 +- autointent/modules/decision/_adaptive.py | 6 +- autointent/modules/decision/_jinoos.py | 6 +- autointent/modules/decision/_threshold.py | 8 +- autointent/modules/decision/_tunable.py | 5 +- autointent/modules/embedding/_logreg.py | 5 +- autointent/modules/embedding/_retrieval.py | 6 +- .../scoring/_description/description.py | 5 +- autointent/modules/scoring/_dnnc/dnnc.py | 5 +- autointent/modules/scoring/_knn/knn.py | 5 +- autointent/modules/scoring/_mlknn/mlknn.py | 9 +- .../nodes/_optimization/_node_optimizer.py | 41 +- autointent/nodes/schemes.py | 63 ++- docs/optimizer_config.schema.json | 532 +++++++++++++++--- 19 files changed, 569 insertions(+), 147 deletions(-) diff --git a/.github/workflows/test-inference.yaml b/.github/workflows/test-inference.yaml index ca4c45ff0..a68ef07e8 100644 --- a/.github/workflows/test-inference.yaml +++ b/.github/workflows/test-inference.yaml @@ -5,8 +5,6 @@ on: branches: - dev pull_request: - branches: - - dev jobs: test: diff --git a/.github/workflows/test-nodes.yaml b/.github/workflows/test-nodes.yaml index 99507571b..b10161724 100644 --- a/.github/workflows/test-nodes.yaml +++ b/.github/workflows/test-nodes.yaml @@ -5,8 +5,6 @@ on: branches: - dev pull_request: - branches: - - dev jobs: test: diff --git a/.github/workflows/test-optimization.yaml b/.github/workflows/test-optimization.yaml index ea1cf861e..4625f39d7 100644 --- a/.github/workflows/test-optimization.yaml +++ b/.github/workflows/test-optimization.yaml @@ -5,8 +5,6 @@ on: branches: - dev pull_request: - branches: - - dev jobs: test: diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index 3612d561f..5883080eb 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -5,8 +5,6 @@ on: branches: - dev pull_request: - branches: - - dev jobs: test: diff --git a/autointent/configs/_optimization.py b/autointent/configs/_optimization.py index fb8e2068b..1d85081a4 100644 --- a/autointent/configs/_optimization.py +++ b/autointent/configs/_optimization.py @@ -2,7 +2,7 @@ from pathlib import Path -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, PositiveInt from autointent.custom_types import SamplerType, ValidationScheme @@ -16,7 +16,7 @@ class DataConfig(BaseModel): """Path to the training data. Can be local path or HF repo.""" scheme: ValidationScheme """Hold-out or cross-validation.""" - n_folds: int = 3 + n_folds: PositiveInt = 3 """Number of folds in cross-validation.""" diff --git a/autointent/custom_types.py b/autointent/custom_types.py index ae5f9e6e2..b0318ee38 100644 --- a/autointent/custom_types.py +++ b/autointent/custom_types.py @@ -5,7 +5,9 @@ """ from enum import Enum -from typing import Literal, TypeAlias +from typing import Annotated, Literal, TypeAlias + +from annotated_types import Interval class LogLevel(Enum): @@ -73,3 +75,7 @@ class Split: SamplerType = Literal["brute", "tpe", "random"] ValidationScheme = Literal["ho", "cv"] + + +FloatFromZeroToOne = Annotated[float, Interval(ge=0, le=1)] +"""Float value between 0 and 1, inclusive.""" diff --git a/autointent/modules/decision/_adaptive.py b/autointent/modules/decision/_adaptive.py index e0d155a6e..806a35444 100644 --- a/autointent/modules/decision/_adaptive.py +++ b/autointent/modules/decision/_adaptive.py @@ -7,7 +7,7 @@ import numpy.typing as npt from autointent import Context -from autointent.custom_types import ListOfGenericLabels, ListOfLabelsWithOOS, MultiLabel +from autointent.custom_types import FloatFromZeroToOne, ListOfGenericLabels, ListOfLabelsWithOOS, MultiLabel from autointent.exceptions import MismatchNumClassesError from autointent.metrics import decision_f1 from autointent.modules.abc import DecisionModule @@ -59,7 +59,7 @@ class AdaptiveDecision(DecisionModule): supports_oos = False name = "adaptive" - def __init__(self, search_space: list[float] | None = None) -> None: + def __init__(self, search_space: list[FloatFromZeroToOne] | None = None) -> None: """ Initialize the AdaptiveDecision. @@ -69,7 +69,7 @@ def __init__(self, search_space: list[float] | None = None) -> None: self.search_space = search_space if search_space is not None else default_search_space @classmethod - def from_context(cls, context: Context, search_space: list[float] | None = None) -> "AdaptiveDecision": + def from_context(cls, context: Context, search_space: list[FloatFromZeroToOne] | None = None) -> "AdaptiveDecision": """ Create an AdaptiveDecision instance using a Context object. diff --git a/autointent/modules/decision/_jinoos.py b/autointent/modules/decision/_jinoos.py index f266d66ec..14676c2b6 100644 --- a/autointent/modules/decision/_jinoos.py +++ b/autointent/modules/decision/_jinoos.py @@ -6,7 +6,7 @@ import numpy.typing as npt from autointent import Context -from autointent.custom_types import ListOfGenericLabels +from autointent.custom_types import FloatFromZeroToOne, ListOfGenericLabels from autointent.exceptions import MismatchNumClassesError from autointent.modules.abc import DecisionModule from autointent.schemas import Tag @@ -55,7 +55,7 @@ class JinoosDecision(DecisionModule): def __init__( self, - search_space: list[float] | None = None, + search_space: list[FloatFromZeroToOne] | None = None, ) -> None: """ Initialize Jinoos predictor. @@ -65,7 +65,7 @@ def __init__( self.search_space = np.array(search_space) if search_space is not None else default_search_space @classmethod - def from_context(cls, context: Context, search_space: list[float] | None = None) -> "JinoosDecision": + def from_context(cls, context: Context, search_space: list[FloatFromZeroToOne] | None = None) -> "JinoosDecision": """ Initialize from context. diff --git a/autointent/modules/decision/_threshold.py b/autointent/modules/decision/_threshold.py index 76bf0f281..42dcbca20 100644 --- a/autointent/modules/decision/_threshold.py +++ b/autointent/modules/decision/_threshold.py @@ -7,7 +7,7 @@ import numpy.typing as npt from autointent import Context -from autointent.custom_types import ListOfGenericLabels, MultiLabel +from autointent.custom_types import FloatFromZeroToOne, ListOfGenericLabels, MultiLabel from autointent.exceptions import MismatchNumClassesError from autointent.modules.abc import DecisionModule from autointent.schemas import Tag @@ -75,7 +75,7 @@ class ThresholdDecision(DecisionModule): def __init__( self, - thresh: float | list[float], + thresh: FloatFromZeroToOne | list[FloatFromZeroToOne], ) -> None: """ Initialize threshold predictor. @@ -85,7 +85,9 @@ def __init__( self.thresh = thresh if isinstance(thresh, float) else np.array(thresh) @classmethod - def from_context(cls, context: Context, thresh: float | list[float] = 0.5) -> "ThresholdDecision": + def from_context( + cls, context: Context, thresh: FloatFromZeroToOne | list[FloatFromZeroToOne] = 0.5 + ) -> "ThresholdDecision": """ Initialize from context. diff --git a/autointent/modules/decision/_tunable.py b/autointent/modules/decision/_tunable.py index 34d0c2ce9..82f348b99 100644 --- a/autointent/modules/decision/_tunable.py +++ b/autointent/modules/decision/_tunable.py @@ -6,6 +6,7 @@ import numpy.typing as npt import optuna from optuna.trial import Trial +from pydantic import PositiveInt from autointent.context import Context from autointent.custom_types import ListOfGenericLabels @@ -77,7 +78,7 @@ class TunableDecision(DecisionModule): def __init__( self, - n_trials: int = 320, + n_trials: PositiveInt = 320, seed: int = 0, tags: list[Tag] | None = None, ) -> None: @@ -93,7 +94,7 @@ def __init__( self.tags = tags @classmethod - def from_context(cls, context: Context, n_trials: int = 320) -> "TunableDecision": + def from_context(cls, context: Context, n_trials: PositiveInt = 320) -> "TunableDecision": """ Initialize from context. diff --git a/autointent/modules/embedding/_logreg.py b/autointent/modules/embedding/_logreg.py index 64283ee9c..496b245ab 100644 --- a/autointent/modules/embedding/_logreg.py +++ b/autointent/modules/embedding/_logreg.py @@ -2,6 +2,7 @@ import numpy as np from numpy.typing import NDArray +from pydantic import PositiveInt from sklearn.linear_model import LogisticRegression, LogisticRegressionCV from sklearn.multioutput import MultiOutputClassifier from sklearn.preprocessing import LabelEncoder @@ -48,7 +49,7 @@ class LogregAimedEmbedding(EmbeddingModule): def __init__( self, embedder_name: str, - cv: int = 3, + cv: PositiveInt = 3, embedder_device: str = "cpu", embedder_batch_size: int = 32, embedder_max_length: int | None = None, @@ -76,7 +77,7 @@ def from_context( cls, context: Context, embedder_name: str, - cv: int = 3, + cv: PositiveInt = 3, ) -> "LogregAimedEmbedding": """ Create a LogregAimedEmbedding instance using a Context object. diff --git a/autointent/modules/embedding/_retrieval.py b/autointent/modules/embedding/_retrieval.py index 5063a1b00..2ca62eff1 100644 --- a/autointent/modules/embedding/_retrieval.py +++ b/autointent/modules/embedding/_retrieval.py @@ -1,5 +1,7 @@ """RetrievalAimedEmbedding class for a proxy optimization of embedding.""" +from pydantic import PositiveInt + from autointent import Context, VectorIndex from autointent.context.optimization_info import RetrieverArtifact from autointent.custom_types import ListOfLabels @@ -41,7 +43,7 @@ class RetrievalAimedEmbedding(EmbeddingModule): def __init__( self, - k: int, + k: PositiveInt, embedder_name: str, embedder_device: str = "cpu", embedder_batch_size: int = 32, @@ -69,7 +71,7 @@ def __init__( def from_context( cls, context: Context, - k: int, + k: PositiveInt, embedder_name: str, ) -> "RetrievalAimedEmbedding": """ diff --git a/autointent/modules/scoring/_description/description.py b/autointent/modules/scoring/_description/description.py index 79ade2aa7..ff55ae334 100644 --- a/autointent/modules/scoring/_description/description.py +++ b/autointent/modules/scoring/_description/description.py @@ -5,6 +5,7 @@ import numpy as np import scipy from numpy.typing import NDArray +from pydantic import PositiveFloat from sklearn.metrics.pairwise import cosine_similarity from autointent import Context, Embedder @@ -37,7 +38,7 @@ class DescriptionScorer(ScoringModule): def __init__( self, embedder_name: str, - temperature: float = 1.0, + temperature: PositiveFloat = 1.0, embedder_device: str = "cpu", embedder_batch_size: int = 32, embedder_max_length: int | None = None, @@ -64,7 +65,7 @@ def __init__( def from_context( cls, context: Context, - temperature: float, + temperature: PositiveFloat, embedder_name: str | None = None, ) -> "DescriptionScorer": """ diff --git a/autointent/modules/scoring/_dnnc/dnnc.py b/autointent/modules/scoring/_dnnc/dnnc.py index 0610785ae..84b51a496 100644 --- a/autointent/modules/scoring/_dnnc/dnnc.py +++ b/autointent/modules/scoring/_dnnc/dnnc.py @@ -6,6 +6,7 @@ import numpy as np import numpy.typing as npt +from pydantic import PositiveInt from autointent import Context, Ranker, VectorIndex from autointent.custom_types import ListOfLabels @@ -77,7 +78,7 @@ def __init__( # noqa: PLR0913 self, cross_encoder_name: str, embedder_name: str, - k: int, + k: PositiveInt, embedder_device: str = "cpu", embedder_batch_size: int = 32, embedder_max_length: int | None = None, @@ -118,7 +119,7 @@ def from_context( cls, context: Context, cross_encoder_name: str, - k: int, + k: PositiveInt, embedder_name: str | None = None, train_head: bool = False, ) -> "DNNCScorer": diff --git a/autointent/modules/scoring/_knn/knn.py b/autointent/modules/scoring/_knn/knn.py index ab665dfe7..eaa5ce864 100644 --- a/autointent/modules/scoring/_knn/knn.py +++ b/autointent/modules/scoring/_knn/knn.py @@ -4,6 +4,7 @@ import numpy as np import numpy.typing as npt +from pydantic import PositiveInt from autointent import Context, VectorIndex from autointent.custom_types import WEIGHT_TYPES, ListOfLabels @@ -57,7 +58,7 @@ class KNNScorer(ScoringModule): def __init__( self, embedder_name: str, - k: int, + k: PositiveInt, weights: WEIGHT_TYPES = "distance", embedder_device: str = "cpu", embedder_batch_size: int = 32, @@ -90,7 +91,7 @@ def __init__( def from_context( cls, context: Context, - k: int, + k: PositiveInt, weights: WEIGHT_TYPES, embedder_name: str | None = None, ) -> "KNNScorer": diff --git a/autointent/modules/scoring/_mlknn/mlknn.py b/autointent/modules/scoring/_mlknn/mlknn.py index b43763ab0..a25220f70 100644 --- a/autointent/modules/scoring/_mlknn/mlknn.py +++ b/autointent/modules/scoring/_mlknn/mlknn.py @@ -4,6 +4,7 @@ import numpy as np from numpy.typing import NDArray +from pydantic import NonNegativeInt, PositiveFloat, PositiveInt from autointent import Context, VectorIndex from autointent.custom_types import ListOfLabels @@ -57,7 +58,7 @@ class MLKnnScorer(ScoringModule): def __init__( self, - k: int, + k: PositiveInt, embedder_name: str, s: float = 1.0, ignore_first_neighbours: int = 0, @@ -91,9 +92,9 @@ def __init__( def from_context( cls, context: Context, - k: int, - s: float = 1.0, - ignore_first_neighbours: int = 0, + k: PositiveInt, + s: PositiveFloat = 1.0, + ignore_first_neighbours: NonNegativeInt = 0, embedder_name: str | None = None, ) -> "MLKnnScorer": """ diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index 9a4a2493a..bca970ebd 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -5,11 +5,12 @@ from copy import deepcopy from functools import partial from pathlib import Path -from typing import Any, TypedDict +from typing import Any import optuna import torch from optuna.trial import Trial +from pydantic import BaseModel, Field from autointent import Dataset from autointent.context import Context @@ -17,18 +18,18 @@ from autointent.nodes._nodes_info import NODES_INFO -class ParamSpaceInt(TypedDict, total=False): - low: int - high: int - step: int - log: bool +class ParamSpaceInt(BaseModel): + low: int = Field(..., description="Low boundary of the search space.") + high: int = Field(..., description="High boundary of the search space.") + step: int = Field(1, description="Step of the search space.") + log: bool = Field(False, description="Whether to use a logarithmic scale.") -class ParamSpaceFloat(TypedDict, total=False): - low: float - high: float - step: float - log: bool +class ParamSpaceFloat(BaseModel): + low: float = Field(..., description="Low boundary of the search space.") + high: float = Field(..., description="High boundary of the search space.") + step: float = Field(0.1, description="Step of the search space.") + log: bool = Field(False, description="Whether to use a logarithmic scale.") class NodeOptimizer: @@ -145,16 +146,24 @@ def objective( return target_metric - def suggest( - self, trial: Trial, search_space: dict[str, ParamSpaceInt | ParamSpaceFloat | list[Any]] - ) -> dict[str, Any]: + def suggest(self, trial: Trial, search_space: dict[str, Any | list[Any]]) -> dict[str, Any]: res: dict[str, Any] = {} + + def is_valid_param_space( + param_space: dict[str, Any], space_type: type[ParamSpaceInt | ParamSpaceFloat] + ) -> bool: + try: + space_type(**param_space) + return True # noqa: TRY300 + except ValueError: + return False + for param_name, param_space in search_space.items(): if isinstance(param_space, list): res[param_name] = trial.suggest_categorical(param_name, choices=param_space) - elif all(isinstance(v, int) for v in param_space.values()): + elif is_valid_param_space(param_space, ParamSpaceInt): res[param_name] = trial.suggest_int(param_name, **param_space) - elif all(isinstance(v, float) for v in param_space.values()): + elif is_valid_param_space(param_space, ParamSpaceFloat): res[param_name] = trial.suggest_float(param_name, **param_space) else: msg = f"Unsupported type of param search space: {param_space}" diff --git a/autointent/nodes/schemes.py b/autointent/nodes/schemes.py index 58cba623b..acfc71fab 100644 --- a/autointent/nodes/schemes.py +++ b/autointent/nodes/schemes.py @@ -2,13 +2,58 @@ import inspect from collections.abc import Iterator -from typing import Any, Literal, TypeAlias, Union, get_type_hints +from typing import Annotated, Any, Literal, TypeAlias, Union, get_args, get_origin, get_type_hints -from pydantic import BaseModel, Field, RootModel +from pydantic import BaseModel, Field, PositiveInt, RootModel from autointent.custom_types import NodeType from autointent.modules.abc import Module from autointent.nodes import DecisionNodeInfo, EmbeddingNodeInfo, ScoringNodeInfo +from autointent.nodes._optimization._node_optimizer import ParamSpaceFloat, ParamSpaceInt + + +def unwrap_annotated(tp: type) -> type: + """ + Unwrap the Annotated type to get the actual type. + + :param tp: Type to unwrap + :return: Unwrapped type + """ + return get_args(tp)[0] if get_origin(tp) is Annotated else tp + + +def type_matches(target: type, tp: type) -> bool: + """ + Recursively check if the target type is present in the given type. + + This function handles union types by unwrapping Annotated types where necessary. + + :param target: Target type + :param tp: Given type + :return: If the target type is present in the given type + """ + origin = get_origin(tp) + + if origin is Union: # float | list[float] + return any(type_matches(target, arg) for arg in get_args(tp)) + return unwrap_annotated(tp) is target + + +def get_optuna_class(param_type: type) -> type[ParamSpaceInt | ParamSpaceFloat] | None: + """ + Get the Optuna class for the given parameter type. + + If the (possibly annotated or union) type includes int or float, this function + returns the corresponding search space class. + + :param param_type: Parameter type (could be a union, annotated type, or container) + :return: ParamSpaceInt if the type matches int, ParamSpaceFloat if it matches float, else None. + """ + if type_matches(int, param_type): + return ParamSpaceInt + if type_matches(float, param_type): + return ParamSpaceFloat + return None def generate_models_and_union_type_for_classes( @@ -20,9 +65,12 @@ def generate_models_and_union_type_for_classes( for cls in classes: init_signature = inspect.signature(cls.from_context) globalns = getattr(cls.from_context, "__globals__", {}) - type_hints = get_type_hints(cls.from_context, globalns, None) # Resolve forward refs + type_hints = get_type_hints(cls.from_context, globalns, None, include_extras=True) # Resolve forward refs - fields = {"module_name": (Literal[cls.name], Field(...))} + fields = { + "module_name": (Literal[cls.name], Field(...)), + "n_trials": (PositiveInt | None, Field(None, description="Number of trials")), + } for param_name, param in init_signature.parameters.items(): if param_name in ("self", "cls", "context"): @@ -30,8 +78,11 @@ def generate_models_and_union_type_for_classes( param_type: TypeAlias = type_hints.get(param_name, Any) # type: ignore[valid-type] # noqa: PYI042 field = Field(default=[param.default]) if param.default is not inspect.Parameter.empty else Field(...) - - fields[param_name] = (list[param_type], field) # type: ignore[assignment] + search_type = get_optuna_class(param_type) + if search_type is None: + fields[param_name] = (list[param_type], field) + else: + fields[param_name] = (list[param_type] | search_type, field) model_name = f"{cls.__name__}InitModel" models[cls.__name__] = type( diff --git a/docs/optimizer_config.schema.json b/docs/optimizer_config.schema.json index 73e79757e..6980b460f 100644 --- a/docs/optimizer_config.schema.json +++ b/docs/optimizer_config.schema.json @@ -7,6 +7,20 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "search_space": { "default": [ null @@ -15,6 +29,8 @@ "anyOf": [ { "items": { + "maximum": 1.0, + "minimum": 0.0, "type": "number" }, "type": "array" @@ -40,6 +56,20 @@ "const": "argmax", "title": "Module Name", "type": "string" + }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" } }, "required": [ @@ -55,6 +85,20 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "cross_encoder_name": { "items": { "type": "string" @@ -63,11 +107,19 @@ "type": "array" }, "k": { - "items": { - "type": "integer" - }, - "title": "K", - "type": "array" + "anyOf": [ + { + "items": { + "exclusiveMinimum": 0, + "type": "integer" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], + "title": "K" }, "embedder_name": { "default": [ @@ -183,12 +235,34 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "temperature": { - "items": { - "type": "number" - }, - "title": "Temperature", - "type": "array" + "anyOf": [ + { + "items": { + "exclusiveMinimum": 0.0, + "type": "number" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceFloat" + } + ], + "title": "Temperature" }, "embedder_name": { "default": [ @@ -324,6 +398,20 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "search_space": { "default": [ null @@ -332,6 +420,8 @@ "anyOf": [ { "items": { + "maximum": 1.0, + "minimum": 0.0, "type": "number" }, "type": "array" @@ -358,12 +448,34 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "k": { - "items": { - "type": "integer" - }, - "title": "K", - "type": "array" + "anyOf": [ + { + "items": { + "exclusiveMinimum": 0, + "type": "integer" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], + "title": "K" }, "weights": { "items": { @@ -410,6 +522,20 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "embedder_name": { "default": [ null @@ -441,6 +567,20 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "embedder_name": { "items": { "type": "string" @@ -449,14 +589,22 @@ "type": "array" }, "cv": { + "anyOf": [ + { + "items": { + "exclusiveMinimum": 0, + "type": "integer" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], "default": [ 3 ], - "items": { - "type": "integer" - }, - "title": "Cv", - "type": "array" + "title": "Cv" } }, "required": [ @@ -473,32 +621,70 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "k": { - "items": { - "type": "integer" - }, - "title": "K", - "type": "array" + "anyOf": [ + { + "items": { + "exclusiveMinimum": 0, + "type": "integer" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], + "title": "K" }, "s": { + "anyOf": [ + { + "items": { + "exclusiveMinimum": 0.0, + "type": "number" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceFloat" + } + ], "default": [ 1.0 ], - "items": { - "type": "number" - }, - "title": "S", - "type": "array" + "title": "S" }, "ignore_first_neighbours": { + "anyOf": [ + { + "items": { + "minimum": 0, + "type": "integer" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], "default": [ 0 ], - "items": { - "type": "integer" - }, - "title": "Ignore First Neighbours", - "type": "array" + "title": "Ignore First Neighbours" }, "embedder_name": { "default": [ @@ -536,6 +722,70 @@ "title": "NodeType", "type": "string" }, + "ParamSpaceFloat": { + "properties": { + "low": { + "description": "Low boundary of the search space.", + "title": "Low", + "type": "number" + }, + "high": { + "description": "High boundary of the search space.", + "title": "High", + "type": "number" + }, + "step": { + "default": 0.1, + "description": "Step of the search space.", + "title": "Step", + "type": "number" + }, + "log": { + "default": false, + "description": "Whether to use a logarithmic scale.", + "title": "Log", + "type": "boolean" + } + }, + "required": [ + "low", + "high" + ], + "title": "ParamSpaceFloat", + "type": "object" + }, + "ParamSpaceInt": { + "properties": { + "low": { + "description": "Low boundary of the search space.", + "title": "Low", + "type": "integer" + }, + "high": { + "description": "High boundary of the search space.", + "title": "High", + "type": "integer" + }, + "step": { + "default": 1, + "description": "Step of the search space.", + "title": "Step", + "type": "integer" + }, + "log": { + "default": false, + "description": "Whether to use a logarithmic scale.", + "title": "Log", + "type": "boolean" + } + }, + "required": [ + "low", + "high" + ], + "title": "ParamSpaceInt", + "type": "object" + }, "RerankScorerInitModel": { "properties": { "module_name": { @@ -543,12 +793,33 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "k": { - "items": { - "type": "integer" - }, - "title": "K", - "type": "array" + "anyOf": [ + { + "items": { + "type": "integer" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], + "title": "K" }, "weights": { "items": { @@ -597,38 +868,52 @@ "type": "array" }, "m": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], "default": [ null ], - "items": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - }, - "title": "M", - "type": "array" + "title": "M" }, "rank_threshold_cutoff": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], "default": [ null ], - "items": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - }, - "title": "Rank Threshold Cutoff", - "type": "array" + "title": "Rank Threshold Cutoff" } }, "required": [ @@ -647,12 +932,34 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "k": { - "items": { - "type": "integer" - }, - "title": "K", - "type": "array" + "anyOf": [ + { + "items": { + "exclusiveMinimum": 0, + "type": "integer" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], + "title": "K" }, "embedder_name": { "items": { @@ -764,6 +1071,20 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "clf_name": { "default": [ "LogisticRegression" @@ -822,25 +1143,50 @@ "title": "Module Name", "type": "string" }, + "n_trials": { + "anyOf": [ + { + "exclusiveMinimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of trials", + "title": "N Trials" + }, "thresh": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "maximum": 1.0, + "minimum": 0.0, + "type": "number" + }, + { + "items": { + "maximum": 1.0, + "minimum": 0.0, + "type": "number" + }, + "type": "array" + } + ] + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceFloat" + } + ], "default": [ 0.5 ], - "items": { - "anyOf": [ - { - "type": "number" - }, - { - "items": { - "type": "number" - }, - "type": "array" - } - ] - }, - "title": "Thresh", - "type": "array" + "title": "Thresh" } }, "required": [ @@ -857,14 +1203,22 @@ "type": "string" }, "n_trials": { + "anyOf": [ + { + "items": { + "exclusiveMinimum": 0, + "type": "integer" + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], "default": [ 320 ], - "items": { - "type": "integer" - }, - "title": "N Trials", - "type": "array" + "title": "N Trials" } }, "required": [ From 1459b6077f55ea46d60f74860b87d6c4936b3224 Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 14 Feb 2025 10:59:16 +0300 Subject: [PATCH 66/74] return CI config back to normal --- .github/workflows/test-inference.yaml | 2 ++ .github/workflows/test-nodes.yaml | 2 ++ .github/workflows/test-optimization.yaml | 2 ++ .github/workflows/unit-tests.yaml | 2 ++ 4 files changed, 8 insertions(+) diff --git a/.github/workflows/test-inference.yaml b/.github/workflows/test-inference.yaml index a68ef07e8..ca4c45ff0 100644 --- a/.github/workflows/test-inference.yaml +++ b/.github/workflows/test-inference.yaml @@ -5,6 +5,8 @@ on: branches: - dev pull_request: + branches: + - dev jobs: test: diff --git a/.github/workflows/test-nodes.yaml b/.github/workflows/test-nodes.yaml index b10161724..99507571b 100644 --- a/.github/workflows/test-nodes.yaml +++ b/.github/workflows/test-nodes.yaml @@ -5,6 +5,8 @@ on: branches: - dev pull_request: + branches: + - dev jobs: test: diff --git a/.github/workflows/test-optimization.yaml b/.github/workflows/test-optimization.yaml index 4625f39d7..ea1cf861e 100644 --- a/.github/workflows/test-optimization.yaml +++ b/.github/workflows/test-optimization.yaml @@ -5,6 +5,8 @@ on: branches: - dev pull_request: + branches: + - dev jobs: test: diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index 5883080eb..3612d561f 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -5,6 +5,8 @@ on: branches: - dev pull_request: + branches: + - dev jobs: test: From f7798a1486107add1af0f8ca6c88f9361f4fcac8 Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 14 Feb 2025 11:00:40 +0300 Subject: [PATCH 67/74] fix default value for step in `ParamSpaceFloat` --- autointent/nodes/_optimization/_node_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autointent/nodes/_optimization/_node_optimizer.py b/autointent/nodes/_optimization/_node_optimizer.py index bca970ebd..8ff1255c8 100644 --- a/autointent/nodes/_optimization/_node_optimizer.py +++ b/autointent/nodes/_optimization/_node_optimizer.py @@ -28,7 +28,7 @@ class ParamSpaceInt(BaseModel): class ParamSpaceFloat(BaseModel): low: float = Field(..., description="Low boundary of the search space.") high: float = Field(..., description="High boundary of the search space.") - step: float = Field(0.1, description="Step of the search space.") + step: float | None = Field(None, description="Step of the search space.") log: bool = Field(False, description="Whether to use a logarithmic scale.") From 2adb593bcb737d7c695ec8ac1e721bc3ccd3e5fe Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 14 Feb 2025 11:18:45 +0300 Subject: [PATCH 68/74] update schema --- docs/optimizer_config.schema.json | 75 ++++++++++++++----------------- 1 file changed, 34 insertions(+), 41 deletions(-) diff --git a/docs/optimizer_config.schema.json b/docs/optimizer_config.schema.json index 2d2213e7e..a0015d750 100644 --- a/docs/optimizer_config.schema.json +++ b/docs/optimizer_config.schema.json @@ -940,10 +940,17 @@ "type": "number" }, "step": { - "default": 0.1, + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, "description": "Step of the search space.", - "title": "Step", - "type": "number" + "title": "Step" }, "log": { "default": false, @@ -1073,52 +1080,38 @@ "type": "array" }, "m": { - "anyOf": [ - { - "items": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - }, - "type": "array" - }, - { - "$ref": "#/$defs/ParamSpaceInt" - } - ], "default": [ null ], - "title": "M" + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + }, + "title": "M", + "type": "array" }, "rank_threshold_cutoff": { - "anyOf": [ - { - "items": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - }, - "type": "array" - }, - { - "$ref": "#/$defs/ParamSpaceInt" - } - ], "default": [ null ], - "title": "Rank Threshold Cutoff" + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + }, + "title": "Rank Threshold Cutoff", + "type": "array" } }, "required": [ From c32c356146119d24d80fd13640d9798ca7d99976 Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 14 Feb 2025 11:22:10 +0300 Subject: [PATCH 69/74] update callback test --- tests/callback/test_callback.py | 67 +++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/tests/callback/test_callback.py b/tests/callback/test_callback.py index 53b70491c..c9931a6b7 100644 --- a/tests/callback/test_callback.py +++ b/tests/callback/test_callback.py @@ -118,17 +118,78 @@ def test_pipeline_callbacks(dataset): ("end_module", {}), ( "start_module", - {"module_kwargs": {"embedder_config": None, "k": 1, "weights": "uniform"}, "module_name": "knn", "num": 0}, + { + "module_kwargs": { + "embedder_config": { + "batch_size": 32, + "classifier_prompt": None, + "cluster_prompt": None, + "default_prompt": None, + "device": None, + "max_length": None, + "model_name": "sergeyzh/rubert-tiny-turbo", + "passage_prompt": None, + "query_prompt": None, + "sts_prompt": None, + "use_cache": False, + }, + "k": 1, + "weights": "uniform", + }, + "module_name": "knn", + "num": 0, + }, ), ("log_metric", {"metrics": {"scoring_accuracy": 1.0, "scoring_roc_auc": 1.0}}), ("end_module", {}), ( "start_module", - {"module_kwargs": {"embedder_config": None, "k": 1, "weights": "distance"}, "module_name": "knn", "num": 1}, + { + "module_kwargs": { + "embedder_config": { + "batch_size": 32, + "classifier_prompt": None, + "cluster_prompt": None, + "default_prompt": None, + "device": None, + "max_length": None, + "model_name": "sergeyzh/rubert-tiny-turbo", + "passage_prompt": None, + "query_prompt": None, + "sts_prompt": None, + "use_cache": False, + }, + "k": 1, + "weights": "distance", + }, + "module_name": "knn", + "num": 1, + }, ), ("log_metric", {"metrics": {"scoring_accuracy": 1.0, "scoring_roc_auc": 1.0}}), ("end_module", {}), - ("start_module", {"module_kwargs": {"embedder_config": None}, "module_name": "linear", "num": 0}), + ( + "start_module", + { + "module_kwargs": { + "embedder_config": { + "batch_size": 32, + "classifier_prompt": None, + "cluster_prompt": None, + "default_prompt": None, + "device": None, + "max_length": None, + "model_name": "sergeyzh/rubert-tiny-turbo", + "passage_prompt": None, + "query_prompt": None, + "sts_prompt": None, + "use_cache": False, + }, + }, + "module_name": "linear", + "num": 0, + }, + ), ("log_metric", {"metrics": {"scoring_accuracy": 0.75, "scoring_roc_auc": 1.0}}), ("end_module", {}), ("start_module", {"module_kwargs": {"thresh": 0.5}, "module_name": "threshold", "num": 0}), From 70b4079223c9f2d94dccab1b75dedfb4c19ef121 Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 14 Feb 2025 11:22:16 +0300 Subject: [PATCH 70/74] change CI config --- .github/workflows/test-inference.yaml | 2 -- .github/workflows/test-nodes.yaml | 2 -- .github/workflows/test-optimization.yaml | 2 -- .github/workflows/unit-tests.yaml | 2 -- 4 files changed, 8 deletions(-) diff --git a/.github/workflows/test-inference.yaml b/.github/workflows/test-inference.yaml index ca4c45ff0..a68ef07e8 100644 --- a/.github/workflows/test-inference.yaml +++ b/.github/workflows/test-inference.yaml @@ -5,8 +5,6 @@ on: branches: - dev pull_request: - branches: - - dev jobs: test: diff --git a/.github/workflows/test-nodes.yaml b/.github/workflows/test-nodes.yaml index 99507571b..b10161724 100644 --- a/.github/workflows/test-nodes.yaml +++ b/.github/workflows/test-nodes.yaml @@ -5,8 +5,6 @@ on: branches: - dev pull_request: - branches: - - dev jobs: test: diff --git a/.github/workflows/test-optimization.yaml b/.github/workflows/test-optimization.yaml index ea1cf861e..4625f39d7 100644 --- a/.github/workflows/test-optimization.yaml +++ b/.github/workflows/test-optimization.yaml @@ -5,8 +5,6 @@ on: branches: - dev pull_request: - branches: - - dev jobs: test: diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index 3612d561f..5883080eb 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -5,8 +5,6 @@ on: branches: - dev pull_request: - branches: - - dev jobs: test: From d34a4c02902f458d5ea34dec55e8997c2b218d25 Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 14 Feb 2025 11:25:31 +0300 Subject: [PATCH 71/74] update search space configs for testing --- tests/assets/configs/multiclass.yaml | 2 +- tests/assets/configs/optuna.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/assets/configs/multiclass.yaml b/tests/assets/configs/multiclass.yaml index 69db79f8b..ea76dda24 100644 --- a/tests/assets/configs/multiclass.yaml +++ b/tests/assets/configs/multiclass.yaml @@ -20,7 +20,7 @@ - avsolatorio/GIST-small-Embedding-v0 k: [1, 3] - module_name: sklearn - embedder_name: + embedder_config: - sergeyzh/rubert-tiny-turbo clf_name: - LogisticRegression diff --git a/tests/assets/configs/optuna.yaml b/tests/assets/configs/optuna.yaml index 636e217fc..b91b463ee 100644 --- a/tests/assets/configs/optuna.yaml +++ b/tests/assets/configs/optuna.yaml @@ -3,7 +3,7 @@ search_space: - module_name: retrieval k: [10] - embedder_name: + embedder_config: - sentence-transformers/all-MiniLM-L6-v2 - avsolatorio/GIST-small-Embedding-v0 - node_type: scoring From d8d7852362f848103218fd80864bf2803fce18fd Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 14 Feb 2025 11:28:45 +0300 Subject: [PATCH 72/74] enable validation back --- autointent/_pipeline/_pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/autointent/_pipeline/_pipeline.py b/autointent/_pipeline/_pipeline.py index 2b4e463fc..3085b90ba 100644 --- a/autointent/_pipeline/_pipeline.py +++ b/autointent/_pipeline/_pipeline.py @@ -13,6 +13,7 @@ from autointent.custom_types import ListOfGenericLabels, NodeType, SamplerType, ValidationScheme from autointent.metrics import PREDICTION_METRICS_MULTILABEL from autointent.nodes import InferenceNode, NodeOptimizer +from autointent.nodes.schemes import OptimizationConfig from autointent.utils import load_default_search_space, load_search_space from ._schemas import InferencePipelineOutput, InferencePipelineUtteranceOutput @@ -70,7 +71,7 @@ def from_search_space(cls, search_space: list[dict[str, Any]] | Path | str, seed """ if isinstance(search_space, Path | str): search_space = load_search_space(search_space) - validated_search_space = search_space # OptimizationConfig(search_space).model_dump() # type: ignore[arg-type] + validated_search_space = OptimizationConfig(search_space).model_dump() # type: ignore[arg-type] nodes = [NodeOptimizer(**node) for node in validated_search_space] return cls(nodes=nodes, seed=seed) From f727bf4af517ad55f8aa78e7051ccf26febc9317 Mon Sep 17 00:00:00 2001 From: voorhs Date: Fri, 14 Feb 2025 13:44:55 +0300 Subject: [PATCH 73/74] remove TunableDecision from search spaces --- tests/assets/configs/multiclass.yaml | 1 - tests/assets/configs/multilabel.yaml | 1 - tests/assets/configs/optuna.yaml | 1 - 3 files changed, 3 deletions(-) diff --git a/tests/assets/configs/multiclass.yaml b/tests/assets/configs/multiclass.yaml index ea76dda24..3fbf8948c 100644 --- a/tests/assets/configs/multiclass.yaml +++ b/tests/assets/configs/multiclass.yaml @@ -36,6 +36,5 @@ search_space: - module_name: threshold thresh: [0.5, [0.5, 0.5, 0.5, 0.5]] - - module_name: tunable - module_name: argmax - module_name: jinoos diff --git a/tests/assets/configs/multilabel.yaml b/tests/assets/configs/multilabel.yaml index 879c31c6d..91742358a 100644 --- a/tests/assets/configs/multilabel.yaml +++ b/tests/assets/configs/multilabel.yaml @@ -32,5 +32,4 @@ search_space: - module_name: threshold thresh: [0.5, [0.5, 0.5, 0.5, 0.5]] - - module_name: tunable - module_name: adaptive diff --git a/tests/assets/configs/optuna.yaml b/tests/assets/configs/optuna.yaml index b91b463ee..b775ab3f6 100644 --- a/tests/assets/configs/optuna.yaml +++ b/tests/assets/configs/optuna.yaml @@ -24,6 +24,5 @@ thresh: low: 0.1 high: 0.9 - - module_name: tunable - module_name: argmax - module_name: jinoos From fa006f8ae47d2c5b90bcf42d5532922129833fd9 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Fri, 14 Feb 2025 14:41:28 +0300 Subject: [PATCH 74/74] upd schema --- docs/optimizer_config.schema.json | 62 +++++++++++++++++++------------ 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/docs/optimizer_config.schema.json b/docs/optimizer_config.schema.json index a0015d750..f51fa93a3 100644 --- a/docs/optimizer_config.schema.json +++ b/docs/optimizer_config.schema.json @@ -1080,38 +1080,52 @@ "type": "array" }, "m": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], "default": [ null ], - "items": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - }, - "title": "M", - "type": "array" + "title": "M" }, "rank_threshold_cutoff": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "$ref": "#/$defs/ParamSpaceInt" + } + ], "default": [ null ], - "items": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ] - }, - "title": "Rank Threshold Cutoff", - "type": "array" + "title": "Rank Threshold Cutoff" } }, "required": [