Skip to content

Commit

Permalink
[v2] Remove metadata dict (#1719)
Browse files Browse the repository at this point in the history
* remove metadata dict

* lint

* fix n_experiments
  • Loading branch information
Samoed authored Jan 8, 2025
1 parent 2519c7a commit 9bc4a1a
Show file tree
Hide file tree
Showing 125 changed files with 321 additions and 525 deletions.
4 changes: 2 additions & 2 deletions mteb/abstasks/AbsTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def evaluate(

for hf_subset in hf_subsets:
logger.info(
f"\nTask: {self.metadata_dict['name']}, split: {split}, subset: {hf_subset}. Running..."
f"\nTask: {self.metadata.name}, split: {split}, subset: {hf_subset}. Running..."
)
if hf_subset not in self.dataset and hf_subset == "default":
data_split = self.dataset[split]
Expand Down Expand Up @@ -213,7 +213,7 @@ def load_data(self, **kwargs):
"""
if self.data_loaded:
return
self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore
self.dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore
self.dataset_transform()
self.data_loaded = True

Expand Down
7 changes: 3 additions & 4 deletions mteb/abstasks/AbsTaskClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class AbsTaskClassification(AbsTask):
"""Abstract class for kNN classification tasks
The similarity is computed between pairs and the results are ranked.
self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It
self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It
must contain the following columns:
text: str
label: int
Expand All @@ -66,6 +66,7 @@ class AbsTaskClassification(AbsTask):

abstask_prompt = "Classify user passages."
samples_per_label: int = 8
n_experiments: int = 10

def __init__(
self,
Expand All @@ -79,9 +80,7 @@ def __init__(

# Bootstrap parameters
self.n_experiments: int = ( # type: ignore
n_experiments
if n_experiments is not None
else self.metadata_dict.get("n_experiments", 10)
n_experiments if n_experiments is not None else self.n_experiments
)

# kNN parameters
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskClustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class AbsTaskClustering(AbsTask):
"""Abstract class for Clustering tasks
The similarity is computed between pairs and the results are ranked.
self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns:
self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns:
sentences: list of str
labels: list of str
"""
Expand Down
4 changes: 2 additions & 2 deletions mteb/abstasks/AbsTaskClusteringFast.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class AbsTaskClusteringFast(AbsTask):
If the clustering is hierarchical, and more than one label is specified in order for each observation,
V-measures are calculated in the outlined way on each of the levels separately.
self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset.
self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset.
It must contain the following columns:
sentences: list[str]
labels: list[str] | list[list[str]]
Expand All @@ -147,7 +147,7 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)

def _add_main_score(self, scores):
if self.metadata_dict["main_score"] in scores:
if self.metadata.main_score in scores:
scores["main_score"] = scores[self.metadata.main_score]
else:
logger.warning(
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskMultilabelClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class AbsTaskMultilabelClassification(AbsTask):
"""Abstract class for multioutput classification tasks
The similarity is computed between pairs and the results are ranked.
self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns:
self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns:
text: str
label: list[list[int]]
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskPairClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class AbsTaskPairClassification(AbsTask):
The similarity is computed between pairs and the results are ranked. Average precision
is computed to measure how well the methods can be used for pairwise pair classification.
self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns:
self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns:
sentence1: list[str]
sentence2: list[str]
labels: list[int]
Expand Down
6 changes: 3 additions & 3 deletions mteb/abstasks/AbsTaskReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,14 @@ def transform_old_dataset_format(self, given_dataset=None):
for hf_subset in hf_subsets:
if given_dataset:
cur_dataset = given_dataset
elif "name" in self.metadata_dict["dataset"]:
cur_dataset = datasets.load_dataset(**self.metadata_dict["dataset"]) # type: ignore
elif "name" in self.metadata.dataset:
cur_dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore
assert (
hf_subset == "default"
), f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata."
else:
cur_dataset = datasets.load_dataset(
**self.metadata_dict["dataset"], name=hf_subset
**self.metadata.dataset, name=hf_subset
) # type: ignore

for split in cur_dataset:
Expand Down
12 changes: 3 additions & 9 deletions mteb/abstasks/AbsTaskSTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,19 @@ class STSDescriptiveStatistics(DescriptiveStatistics):
class AbsTaskSTS(AbsTask):
"""Abstract class for STS experiments.
self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns::
self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns::
sentence1: str
sentence2: str
score: float
"""

abstask_prompt = "Retrieve semantically similar text."
min_score: int
max_score: int

def __init__(self, **kwargs):
super().__init__(**kwargs)

@property
def min_score(self) -> int:
return self.metadata_dict["min_score"]

@property
def max_score(self) -> int:
return self.metadata_dict["max_score"]

def _evaluate_subset(
self, model, data_split, *, encode_kwargs: dict[str, Any] = {}, **kwargs
) -> ScoresDict:
Expand Down
13 changes: 4 additions & 9 deletions mteb/abstasks/AbsTaskSummarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,16 @@ class SummarizationDescriptiveStatistics(DescriptiveStatistics):
class AbsTaskSummarization(AbsTask):
"""Abstract class for summarization experiments.
self.load_data() must generate a huggingface dataset with a split matching self.metadata_dict["eval_splits"], and assign it to self.dataset. It must contain the following columns:
self.load_data() must generate a huggingface dataset with a split matching self.metadata.eval_splits, and assign it to self.dataset. It must contain the following columns:
text: str
human_summaries: list[str]
machine_summaries: list[str]
relevance: list[float] (the score of the machine generated summaries)
"""

min_score: int
max_score: int

evalutor = SummarizationEvaluator
abstask_prompt = (
"Given a news summary, retrieve other semantically similar summaries."
Expand All @@ -83,14 +86,6 @@ class AbsTaskSummarization(AbsTask):
def __init__(self, **kwargs):
super().__init__(**kwargs)

@property
def min_score(self):
return self.metadata_dict["min_score"]

@property
def max_score(self):
return self.metadata_dict["max_score"]

def _evaluate_subset(
self, model: Encoder, data_split, *, encode_kwargs: dict[str, Any], **kwargs
) -> ScoresDict:
Expand Down
33 changes: 14 additions & 19 deletions mteb/evaluation/MTEB.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,16 @@ def deprecation_warning(

@property
def available_tasks(self):
return [x.metadata_dict["name"] for x in self.tasks_cls]
return [x.metadata.name for x in self.tasks_cls]

@property
def available_task_types(self):
# sort the task types
return sorted({x.metadata_dict["type"] for x in self.tasks_cls})
return sorted({x.metadata.type for x in self.tasks_cls})

@property
def available_task_categories(self):
return {x.metadata_dict["category"] for x in self.tasks_cls}
return {x.metadata.category for x in self.tasks_cls}

def _extend_lang_code(self):
# add all possible language codes
Expand Down Expand Up @@ -241,12 +241,10 @@ def select_tasks(self, **kwargs):
# If `task_list` is specified, select list of tasks
if self._tasks is not None:
self.tasks = list(
filter(
lambda x: (x.metadata_dict["name"] in self._tasks), self.tasks_cls
)
filter(lambda x: (x.metadata.name in self._tasks), self.tasks_cls)
)
if len(self.tasks) != len(self._tasks):
tasks_known = {x.metadata_dict["name"] for x in self.tasks_cls}
tasks_known = {x.metadata.name for x in self.tasks_cls}
tasks_unknown = {
x for x in self._tasks if isinstance(x, str)
} - tasks_known
Expand All @@ -265,23 +263,22 @@ def select_tasks(self, **kwargs):
# Otherwise use filters to select tasks
filtered_tasks = filter(
lambda x: (self._task_types is None)
or (x.metadata_dict["type"] in self._task_types),
or (x.metadata.type in self._task_types),
self.tasks_cls,
)
filtered_tasks = filter(
lambda x: (self._task_categories is None)
or (x.metadata_dict["category"] in self._task_categories),
or (x.metadata.category in self._task_categories),
filtered_tasks,
)
filtered_tasks = filter(
lambda x: (self._version is None)
or (x.metadata_dict["version"] >= self._version),
lambda x: (self._version is None) or (x.metadata.version >= self._version),
filtered_tasks,
)
# keep only tasks with at least one language in the filter
filtered_tasks = filter(
lambda x: (not (self._task_langs))
or (len(set(x.metadata_dict["eval_langs"]) & set(self._task_langs)) > 0),
lambda x: (not self._task_langs)
or (len(set(x.metadata.eval_langs) & set(self._task_langs)) > 0),
filtered_tasks,
)

Expand All @@ -292,7 +289,7 @@ def load_tasks_data(self):
"""Load datasets for the selected tasks."""
logger.info(f"\n\n## Loading datasets for {len(self.tasks)} tasks")
for task in self.tasks:
logger.info(f"\n# Loading dataset for {task.metadata_dict['name']}")
logger.info(f"\n# Loading dataset for {task.metadata.name}")
task.load_data()

@staticmethod
Expand Down Expand Up @@ -595,7 +592,7 @@ def run(
)

logger.info(
f"Evaluation for {task.metadata_dict['name']} on {split} took {tock - tick:.2f} seconds"
f"Evaluation for {task.metadata.name} on {split} took {tock - tick:.2f} seconds"
)
evaluation_time += tock - tick

Expand Down Expand Up @@ -627,16 +624,14 @@ def run(
evaluation_results.append(merged_results)

except Exception as e:
logger.error(
f"Error while evaluating {task.metadata_dict['name']}: {e}"
)
logger.error(f"Error while evaluating {task.metadata.name}: {e}")
if raise_error:
raise e
logger.error(
f"Please check all the error logs at: {self.err_logs_path}"
)
with open(self.err_logs_path, "a") as f_out:
f_out.write(f"{datetime.now()} >>> {task.metadata_dict['name']}\n")
f_out.write(f"{datetime.now()} >>> {task.metadata.name}\n")
f_out.write(traceback.format_exc())
f_out.write("\n\n")

Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/BitextMining/kat/TbilisiCityHallBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ def load_data(self, **kwargs) -> None:
for lang in self.hf_subsets:
l1, l2 = lang.split("-")
dataset = load_dataset(
self.metadata_dict["dataset"]["path"],
self.metadata.dataset["path"],
split=_EVAL_SPLIT,
cache_dir=kwargs.get("cache_dir", None),
revision=self.metadata_dict["dataset"]["revision"],
revision=self.metadata.dataset["revision"],
)
dataset = dataset.rename_columns(
{_LANGUAGES[l1]: "sentence1", _LANGUAGES[l2]: "sentence2"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,7 @@ def load_data(self, **kwargs: Any) -> None:
else:
dataset = datasets.load_dataset(
name=self._transform_lang_name_hf(lang),
**self.metadata_dict["dataset"],
**self.metadata.dataset,
)
self.dataset[lang] = datasets.DatasetDict({"train": dataset})
seen_pairs.append(hf_lang_name)
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def load_data(self, **kwargs):
self.dataset = {}

for lang in self.hf_subsets:
self.dataset[lang] = datasets.load_dataset(**self.metadata_dict["dataset"])
self.dataset[lang] = datasets.load_dataset(**self.metadata.dataset)

self.dataset_transform()
self.data_loaded = True
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/BitextMining/multilingual/FloresBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,5 +274,5 @@ def load_data(self, **kwargs: Any) -> None:
"""Load dataset from HuggingFace hub"""
if self.data_loaded:
return
self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"])
self.dataset = datasets.load_dataset(**self.metadata.dataset)
self.data_loaded = True
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,5 @@ def load_data(self, **kwargs: Any) -> None:
"""Load dataset from HuggingFace hub"""
if self.data_loaded:
return
self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"])
self.dataset = datasets.load_dataset(**self.metadata.dataset)
self.data_loaded = True
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,5 @@ def load_data(self, **kwargs: Any) -> None:
"""Load dataset from HuggingFace hub"""
if self.data_loaded:
return
self.dataset = datasets.load_dataset(**self.metadata_dict["dataset"])
self.dataset = datasets.load_dataset(**self.metadata.dataset)
self.data_loaded = True
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def load_data(self, **kwargs: Any) -> None:
language = f"{coded_source_language}_en"

self.dataset[lang] = datasets.load_dataset(
**self.metadata_dict["dataset"],
**self.metadata.dataset,
field="examples",
data_files={
"validation": f"flores_{language}_dev.json",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def load_data(self, **kwargs):

self.dataset = {}
for lang in self.hf_subsets:
self.dataset[lang] = datasets.load_dataset(**self.metadata_dict["dataset"])
self.dataset[lang] = datasets.load_dataset(**self.metadata.dataset)

self.dataset_transform()
self.data_loaded = True
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _clean_columns(batch, keys):
dataset = datasets.load_dataset(
name="srn-nl_other",
split="test",
**self.metadata_dict["dataset"],
**self.metadata.dataset,
).map(lambda batch: _clean_columns(batch, ["nl", "srn"]), batched=True)
dataset = dataset.rename_columns(
{_LANGUAGES[l1]: "sentence1", _LANGUAGES[l2]: "sentence2"}
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/Classification/eng/LegalBenchClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4550,9 +4550,9 @@ def load_data(self, **kwargs: Any) -> None:
class_count = 0
for dataset_col_map in _MAUD_DATASET_MAP:
_dataset = datasets.load_dataset(
self.metadata_dict["dataset"]["path"],
self.metadata.dataset["path"],
dataset_col_map["name"],
revision=self.metadata_dict["dataset"]["revision"],
revision=self.metadata.dataset["revision"],
trust_remote_code=True,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class IndonesianMongabayConservationClassification(AbsTaskClassification):
)

def dataset_transform(self):
splits = self.metadata_dict["eval_splits"]
splits = self.metadata.eval_splits
class_labels = ["positif", "netral", "negatif"]

ds = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def load_data(self, **kwargs):
return
self.dataset = {}
for lang in self.hf_subsets:
metadata = self.metadata_dict.get("dataset", None)
metadata = self.metadata.dataset
dataset = datasets.load_dataset(name=lang, **metadata)
self.dataset[lang] = _transform(dataset, lang)
self.dataset_transform()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,7 @@ def load_data(self, **kwargs: Any) -> None:

labels = sorted(_LANGUAGES.keys())

data = datasets.load_dataset(**self.metadata_dict["dataset"])["train"]["data"][
0
]
data = datasets.load_dataset(**self.metadata.dataset)["train"]["data"][0]

dataset = {"train": [], "test": []}
for lang, lang_code in LANG_MAP.items():
Expand Down
Loading

0 comments on commit 9bc4a1a

Please sign in to comment.