From 563dfa9c65b7385cdd76758e4bf83ff0526632a0 Mon Sep 17 00:00:00 2001 From: KahnSvaer Date: Tue, 14 Jan 2025 04:02:19 +0530 Subject: [PATCH 1/7] Add MLQA * add mlqa_common_yaml * add 49 tests of mlqa family * update tasks/README.md --------- --- lm_eval/tasks/README.md | 1 + lm_eval/tasks/mlqa/README.md | 101 ++++++++++++++ lm_eval/tasks/mlqa/generate_tasks.py | 48 +++++++ lm_eval/tasks/mlqa/mlqa_ar_ar.yaml | 5 + lm_eval/tasks/mlqa/mlqa_ar_de.yaml | 5 + lm_eval/tasks/mlqa/mlqa_ar_en.yaml | 5 + lm_eval/tasks/mlqa/mlqa_ar_es.yaml | 5 + lm_eval/tasks/mlqa/mlqa_ar_hi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_ar_vi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_ar_zh.yaml | 5 + lm_eval/tasks/mlqa/mlqa_common_yaml.yaml | 22 +++ lm_eval/tasks/mlqa/mlqa_de_ar.yaml | 5 + lm_eval/tasks/mlqa/mlqa_de_de.yaml | 5 + lm_eval/tasks/mlqa/mlqa_de_en.yaml | 5 + lm_eval/tasks/mlqa/mlqa_de_es.yaml | 5 + lm_eval/tasks/mlqa/mlqa_de_hi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_de_vi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_de_zh.yaml | 5 + lm_eval/tasks/mlqa/mlqa_en_ar.yaml | 5 + lm_eval/tasks/mlqa/mlqa_en_de.yaml | 5 + lm_eval/tasks/mlqa/mlqa_en_en.yaml | 5 + lm_eval/tasks/mlqa/mlqa_en_es.yaml | 5 + lm_eval/tasks/mlqa/mlqa_en_hi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_en_vi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_en_zh.yaml | 5 + lm_eval/tasks/mlqa/mlqa_es_ar.yaml | 5 + lm_eval/tasks/mlqa/mlqa_es_de.yaml | 5 + lm_eval/tasks/mlqa/mlqa_es_en.yaml | 5 + lm_eval/tasks/mlqa/mlqa_es_es.yaml | 5 + lm_eval/tasks/mlqa/mlqa_es_hi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_es_vi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_es_zh.yaml | 5 + lm_eval/tasks/mlqa/mlqa_hi_ar.yaml | 5 + lm_eval/tasks/mlqa/mlqa_hi_de.yaml | 5 + lm_eval/tasks/mlqa/mlqa_hi_en.yaml | 5 + lm_eval/tasks/mlqa/mlqa_hi_es.yaml | 5 + lm_eval/tasks/mlqa/mlqa_hi_hi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_hi_vi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_hi_zh.yaml | 5 + lm_eval/tasks/mlqa/mlqa_vi_ar.yaml | 5 + lm_eval/tasks/mlqa/mlqa_vi_de.yaml | 5 + lm_eval/tasks/mlqa/mlqa_vi_en.yaml | 5 + lm_eval/tasks/mlqa/mlqa_vi_es.yaml | 5 + lm_eval/tasks/mlqa/mlqa_vi_hi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_vi_vi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_vi_zh.yaml | 5 + lm_eval/tasks/mlqa/mlqa_zh_ar.yaml | 5 + lm_eval/tasks/mlqa/mlqa_zh_de.yaml | 5 + lm_eval/tasks/mlqa/mlqa_zh_en.yaml | 5 + lm_eval/tasks/mlqa/mlqa_zh_es.yaml | 5 + lm_eval/tasks/mlqa/mlqa_zh_hi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_zh_vi.yaml | 5 + lm_eval/tasks/mlqa/mlqa_zh_zh.yaml | 5 + lm_eval/tasks/mlqa/utils.py | 165 +++++++++++++++++++++++ 54 files changed, 582 insertions(+) create mode 100644 lm_eval/tasks/mlqa/README.md create mode 100644 lm_eval/tasks/mlqa/generate_tasks.py create mode 100644 lm_eval/tasks/mlqa/mlqa_ar_ar.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_ar_de.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_ar_en.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_ar_es.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_ar_hi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_ar_vi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_ar_zh.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_common_yaml.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_de_ar.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_de_de.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_de_en.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_de_es.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_de_hi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_de_vi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_de_zh.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_en_ar.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_en_de.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_en_en.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_en_es.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_en_hi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_en_vi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_en_zh.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_es_ar.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_es_de.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_es_en.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_es_es.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_es_hi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_es_vi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_es_zh.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_hi_ar.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_hi_de.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_hi_en.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_hi_es.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_hi_hi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_hi_vi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_hi_zh.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_vi_ar.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_vi_de.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_vi_en.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_vi_es.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_vi_hi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_vi_vi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_vi_zh.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_zh_ar.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_zh_de.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_zh_en.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_zh_es.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_zh_hi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_zh_vi.yaml create mode 100644 lm_eval/tasks/mlqa/mlqa_zh_zh.yaml create mode 100644 lm_eval/tasks/mlqa/utils.py diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md index 20a1dfa5db..92762e7620 100644 --- a/lm_eval/tasks/README.md +++ b/lm_eval/tasks/README.md @@ -78,6 +78,7 @@ | medqa | Multiple choice question answering based on the United States Medical License Exams. | | | [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu | | [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English | +| [mlqa](mlqa/README.md) | MultiLingual Question Answering benchmark dataset for evaluating cross-lingual question answering performance. | English, Arabic, German, Spanish, Hindi, Vietnamese, Simplified Chinese | | [mmlu](mmlu/README.md) | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English | | [mmlu_pro](mmlu_pro/README.md) | A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options. | English | | [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English | diff --git a/lm_eval/tasks/mlqa/README.md b/lm_eval/tasks/mlqa/README.md new file mode 100644 index 0000000000..3d82f95ff0 --- /dev/null +++ b/lm_eval/tasks/mlqa/README.md @@ -0,0 +1,101 @@ +# MLQA + +### Paper + +Title: `MLQA: Evaluating Cross-lingual Extractive Question Answering` + +Abstract: `https://arxiv.org/abs/1910.07475` + +MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance. +MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic, +German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between +4 different languages on average + +Homepage: `https://github.com/facebookresearch/MLQA` + + +### Citation + +``` +@misc{lewis2020mlqaevaluatingcrosslingualextractive, + title={MLQA: Evaluating Cross-lingual Extractive Question Answering}, + author={Patrick Lewis and Barlas Oğuz and Ruty Rinott and Sebastian Riedel and Holger Schwenk}, + year={2020}, + eprint={1910.07475}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/1910.07475}, +} +``` + +### Groups, Tags, and Tasks + +#### Groups + +* Not part of a group yet + +#### Tasks + +Tasks of the form `mlqa_context-lang_question-lang.yaml` +* `mlqa_ar_ar.yaml` +* `mlqa_ar_de.yaml` +* `mlqa_ar_vi.yaml` +* `mlqa_ar_zh.yaml` +* `mlqa_ar_en.yaml` +* `mlqa_ar_es.yaml` +* `mlqa_ar_hi.yaml` +* `mlqa_de_ar.yaml` +* `mlqa_de_de.yaml` +* `mlqa_de_vi.yaml` +* `mlqa_de_zh.yaml` +* `mlqa_de_en.yaml` +* `mlqa_de_es.yaml` +* `mlqa_de_hi.yaml` +* `mlqa_vi_ar.yaml` +* `mlqa_vi_de.yaml` +* `mlqa_vi_vi.yaml` +* `mlqa_vi_zh.yaml` +* `mlqa_vi_en.yaml` +* `mlqa_vi_es.yaml` +* `mlqa_vi_hi.yaml` +* `mlqa_zh_ar.yaml` +* `mlqa_zh_de.yaml` +* `mlqa_zh_vi.yaml` +* `mlqa_zh_zh.yaml` +* `mlqa_zh_en.yaml` +* `mlqa_zh_es.yaml` +* `mlqa_zh_hi.yaml` +* `mlqa_en_ar.yaml` +* `mlqa_en_de.yaml` +* `mlqa_en_vi.yaml` +* `mlqa_en_zh.yaml` +* `mlqa_en_en.yaml` +* `mlqa_en_es.yaml` +* `mlqa_en_hi.yaml` +* `mlqa_es_ar.yaml` +* `mlqa_es_de.yaml` +* `mlqa_es_vi.yaml` +* `mlqa_es_zh.yaml` +* `mlqa_es_en.yaml` +* `mlqa_es_es.yaml` +* `mlqa_es_hi.yaml` +* `mlqa_hi_ar.yaml` +* `mlqa_hi_de.yaml` +* `mlqa_hi_vi.yaml` +* `mlqa_hi_zh.yaml` +* `mlqa_hi_en.yaml` +* `mlqa_hi_es.yaml` +* `mlqa_hi_hi.yaml` + +### Checklist + +For adding novel benchmarks/datasets to the library: +* [x] Is the task an existing benchmark in the literature? + * [x] Have you referenced the original paper that introduced the task? + * [x] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test? + + +If other tasks on this dataset are already supported: +* [ ] Is the "Main" variant of this task clearly denoted? +* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? +* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? diff --git a/lm_eval/tasks/mlqa/generate_tasks.py b/lm_eval/tasks/mlqa/generate_tasks.py new file mode 100644 index 0000000000..ef21b8669c --- /dev/null +++ b/lm_eval/tasks/mlqa/generate_tasks.py @@ -0,0 +1,48 @@ +# ruff: noqa: E731, E741 +""" +Script to generate task YAMLs for the mlqa dataset. +Based on `tasks/bigbench/generate_tasks.py`. +""" + +from datasets import get_dataset_config_names + + +chosen_subtasks = [] + +language_dict = { + "en": "english", + "es": "spanish", + "hi": "hindi", + "vi": "vietnamese", + "de": "german", + "ar": "arabic", + "zh": "chinese", +} + + +def main() -> None: + configs = get_dataset_config_names("facebook/mlqa", trust_remote_code=True) + for config in configs: + if len(config.split(".")) == 2: + continue + else: + chosen_subtasks.append(config) + assert len(chosen_subtasks) == 49 + for task in chosen_subtasks: + file_name = f"{task.replace(".","_")}.yaml" + context_lang = file_name.split("_")[1] + # Not using yaml to avoid tagging issues with !function + with open(file_name, "w", encoding="utf-8") as f: + f.write("# Generated by generate_tasks.py\n") + + # Manually writing the YAML-like content inside files to avoid tagging issues + f.write("include: mlqa_common_yaml.yaml\n") + f.write(f"task: {task.replace('.', '_')}\n") + f.write(f"dataset_name: {task}\n") + f.write( + f"process_results: !function utils.process_results_{context_lang}\n" + ) + + +if __name__ == "__main__": + main() diff --git a/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml b/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml new file mode 100644 index 0000000000..2cf5e31faf --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_ar_ar +dataset_name: mlqa.ar.ar +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_de.yaml b/lm_eval/tasks/mlqa/mlqa_ar_de.yaml new file mode 100644 index 0000000000..cff01a0fe6 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_ar_de +dataset_name: mlqa.ar.de +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_en.yaml b/lm_eval/tasks/mlqa/mlqa_ar_en.yaml new file mode 100644 index 0000000000..61ce4810d2 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_ar_en +dataset_name: mlqa.ar.en +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_es.yaml b/lm_eval/tasks/mlqa/mlqa_ar_es.yaml new file mode 100644 index 0000000000..ab68e5bbf2 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_ar_es +dataset_name: mlqa.ar.es +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml b/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml new file mode 100644 index 0000000000..b924dd547c --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_ar_hi +dataset_name: mlqa.ar.hi +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml b/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml new file mode 100644 index 0000000000..300f999ce3 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_ar_vi +dataset_name: mlqa.ar.vi +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml b/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml new file mode 100644 index 0000000000..eb8f9f9789 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_ar_zh +dataset_name: mlqa.ar.zh +process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_common_yaml.yaml b/lm_eval/tasks/mlqa/mlqa_common_yaml.yaml new file mode 100644 index 0000000000..c52ecb8914 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_common_yaml.yaml @@ -0,0 +1,22 @@ +dataset_path: facebook/mlqa +dataset_kwargs: + trust_remote_code: true +test_split: test +validation_split: validation +output_type: generate_until +doc_to_text: "Context: {{context}}\n\nQuestion: {{question}}\n\nAnswer:" +doc_to_target: "{{answers}}" +process_docs: !function utils.process_docs +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + - metric: f1 + aggregation: mean + higher_is_better: true +generation_kwargs: + until: + - "\n" + do_sample: false +metadata: + version: 0.0 diff --git a/lm_eval/tasks/mlqa/mlqa_de_ar.yaml b/lm_eval/tasks/mlqa/mlqa_de_ar.yaml new file mode 100644 index 0000000000..d8f59736d9 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_de_ar +dataset_name: mlqa.de.ar +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_de.yaml b/lm_eval/tasks/mlqa/mlqa_de_de.yaml new file mode 100644 index 0000000000..fd369e8613 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_de_de +dataset_name: mlqa.de.de +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_en.yaml b/lm_eval/tasks/mlqa/mlqa_de_en.yaml new file mode 100644 index 0000000000..22b35e352d --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_de_en +dataset_name: mlqa.de.en +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_es.yaml b/lm_eval/tasks/mlqa/mlqa_de_es.yaml new file mode 100644 index 0000000000..b42550f81e --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_de_es +dataset_name: mlqa.de.es +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_hi.yaml b/lm_eval/tasks/mlqa/mlqa_de_hi.yaml new file mode 100644 index 0000000000..2de7d38681 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_de_hi +dataset_name: mlqa.de.hi +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_vi.yaml b/lm_eval/tasks/mlqa/mlqa_de_vi.yaml new file mode 100644 index 0000000000..3c2db838d7 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_de_vi +dataset_name: mlqa.de.vi +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_zh.yaml b/lm_eval/tasks/mlqa/mlqa_de_zh.yaml new file mode 100644 index 0000000000..9878fbf4c1 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_de_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_de_zh +dataset_name: mlqa.de.zh +process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_en_ar.yaml b/lm_eval/tasks/mlqa/mlqa_en_ar.yaml new file mode 100644 index 0000000000..6a2109fa87 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_en_ar +dataset_name: mlqa.en.ar +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_de.yaml b/lm_eval/tasks/mlqa/mlqa_en_de.yaml new file mode 100644 index 0000000000..2ab6cbb17b --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_en_de +dataset_name: mlqa.en.de +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_en.yaml b/lm_eval/tasks/mlqa/mlqa_en_en.yaml new file mode 100644 index 0000000000..b097b4690e --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_en_en +dataset_name: mlqa.en.en +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_es.yaml b/lm_eval/tasks/mlqa/mlqa_en_es.yaml new file mode 100644 index 0000000000..cffd5a1995 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_en_es +dataset_name: mlqa.en.es +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_hi.yaml b/lm_eval/tasks/mlqa/mlqa_en_hi.yaml new file mode 100644 index 0000000000..c2879bf109 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_en_hi +dataset_name: mlqa.en.hi +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_vi.yaml b/lm_eval/tasks/mlqa/mlqa_en_vi.yaml new file mode 100644 index 0000000000..ae10dee1bf --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_en_vi +dataset_name: mlqa.en.vi +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_zh.yaml b/lm_eval/tasks/mlqa/mlqa_en_zh.yaml new file mode 100644 index 0000000000..4f3cac5c76 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_en_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_en_zh +dataset_name: mlqa.en.zh +process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_es_ar.yaml b/lm_eval/tasks/mlqa/mlqa_es_ar.yaml new file mode 100644 index 0000000000..ef916babb1 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_es_ar +dataset_name: mlqa.es.ar +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_de.yaml b/lm_eval/tasks/mlqa/mlqa_es_de.yaml new file mode 100644 index 0000000000..827d77b92a --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_es_de +dataset_name: mlqa.es.de +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_en.yaml b/lm_eval/tasks/mlqa/mlqa_es_en.yaml new file mode 100644 index 0000000000..4e706a7bea --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_es_en +dataset_name: mlqa.es.en +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_es.yaml b/lm_eval/tasks/mlqa/mlqa_es_es.yaml new file mode 100644 index 0000000000..c8d5f3742c --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_es_es +dataset_name: mlqa.es.es +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_hi.yaml b/lm_eval/tasks/mlqa/mlqa_es_hi.yaml new file mode 100644 index 0000000000..501026fa98 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_es_hi +dataset_name: mlqa.es.hi +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_vi.yaml b/lm_eval/tasks/mlqa/mlqa_es_vi.yaml new file mode 100644 index 0000000000..abf0d9ae1e --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_es_vi +dataset_name: mlqa.es.vi +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_zh.yaml b/lm_eval/tasks/mlqa/mlqa_es_zh.yaml new file mode 100644 index 0000000000..3c41d276cb --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_es_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_es_zh +dataset_name: mlqa.es.zh +process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml b/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml new file mode 100644 index 0000000000..b054cd0c4f --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_hi_ar +dataset_name: mlqa.hi.ar +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_de.yaml b/lm_eval/tasks/mlqa/mlqa_hi_de.yaml new file mode 100644 index 0000000000..b323c7cf08 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_hi_de +dataset_name: mlqa.hi.de +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_en.yaml b/lm_eval/tasks/mlqa/mlqa_hi_en.yaml new file mode 100644 index 0000000000..b6a4640a2d --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_hi_en +dataset_name: mlqa.hi.en +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_es.yaml b/lm_eval/tasks/mlqa/mlqa_hi_es.yaml new file mode 100644 index 0000000000..15fb29f3e9 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_hi_es +dataset_name: mlqa.hi.es +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml b/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml new file mode 100644 index 0000000000..ed46263105 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_hi_hi +dataset_name: mlqa.hi.hi +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml b/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml new file mode 100644 index 0000000000..a6c586a222 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_hi_vi +dataset_name: mlqa.hi.vi +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml b/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml new file mode 100644 index 0000000000..518497b6e0 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_hi_zh +dataset_name: mlqa.hi.zh +process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml b/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml new file mode 100644 index 0000000000..62a05dcdc9 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_vi_ar +dataset_name: mlqa.vi.ar +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_de.yaml b/lm_eval/tasks/mlqa/mlqa_vi_de.yaml new file mode 100644 index 0000000000..e76223a4ff --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_vi_de +dataset_name: mlqa.vi.de +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_en.yaml b/lm_eval/tasks/mlqa/mlqa_vi_en.yaml new file mode 100644 index 0000000000..cd6ea68146 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_vi_en +dataset_name: mlqa.vi.en +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_es.yaml b/lm_eval/tasks/mlqa/mlqa_vi_es.yaml new file mode 100644 index 0000000000..ac2f9e2019 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_vi_es +dataset_name: mlqa.vi.es +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml b/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml new file mode 100644 index 0000000000..69e08aa047 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_vi_hi +dataset_name: mlqa.vi.hi +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml b/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml new file mode 100644 index 0000000000..4700a643dc --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_vi_vi +dataset_name: mlqa.vi.vi +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml b/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml new file mode 100644 index 0000000000..b1d37862f3 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_vi_zh +dataset_name: mlqa.vi.zh +process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml b/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml new file mode 100644 index 0000000000..b42ee1dfe9 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_zh_ar +dataset_name: mlqa.zh.ar +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_de.yaml b/lm_eval/tasks/mlqa/mlqa_zh_de.yaml new file mode 100644 index 0000000000..f069cbb965 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_de.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_zh_de +dataset_name: mlqa.zh.de +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_en.yaml b/lm_eval/tasks/mlqa/mlqa_zh_en.yaml new file mode 100644 index 0000000000..91f464229a --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_en.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_zh_en +dataset_name: mlqa.zh.en +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_es.yaml b/lm_eval/tasks/mlqa/mlqa_zh_es.yaml new file mode 100644 index 0000000000..a831f93fcb --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_es.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_zh_es +dataset_name: mlqa.zh.es +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml b/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml new file mode 100644 index 0000000000..df115819ac --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_zh_hi +dataset_name: mlqa.zh.hi +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml b/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml new file mode 100644 index 0000000000..6ed4b41c1d --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_zh_vi +dataset_name: mlqa.zh.vi +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml b/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml new file mode 100644 index 0000000000..4c95aecdba --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml @@ -0,0 +1,5 @@ +# Generated by generate_tasks.py +include: mlqa_common_yaml.yaml +task: mlqa_zh_zh +dataset_name: mlqa.zh.zh +process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/utils.py b/lm_eval/tasks/mlqa/utils.py new file mode 100644 index 0000000000..61e593716a --- /dev/null +++ b/lm_eval/tasks/mlqa/utils.py @@ -0,0 +1,165 @@ +""" +Code based on Official evaluation script for the MLQA dataset. +Repo: https://github.com/facebookresearch/MLQA/blob/main/mlqa_evaluation_v1.py +""" + +import re +import string +import sys +import unicodedata +from collections import Counter + +import datasets + + +PUNCT = { + chr(i) + for i in range(sys.maxunicode) + if unicodedata.category(chr(i)).startswith("P") +}.union(string.punctuation) +WHITESPACE_LANGS = ["en", "es", "hi", "vi", "de", "ar"] +MIXED_SEGMENTATION_LANGS = ["zh"] + + +def whitespace_tokenize(text): + return text.split() + + +def mixed_segmentation(text): + segs_out = [] + temp_str = "" + for char in text: + if re.search(r"[\u4e00-\u9fa5]", char) or char in PUNCT: + if temp_str != "": + ss = whitespace_tokenize(temp_str) + segs_out.extend(ss) + temp_str = "" + segs_out.append(char) + else: + temp_str += char + + if temp_str != "": + ss = whitespace_tokenize(temp_str) + segs_out.extend(ss) + + return segs_out + + +def normalize_answer(s, lang): + """Lower text and remove punctuation, articles and extra whitespace.""" + + def remove_articles(text, lang): + if lang == "en": + return re.sub(r"\b(a|an|the)\b", " ", text) + elif lang == "es": + return re.sub(r"\b(un|una|unos|unas|el|la|los|las)\b", " ", text) + elif lang == "hi": + return text # Hindi does not have formal articles + elif lang == "vi": + return re.sub(r"\b(của|là|cái|chiếc|những)\b", " ", text) + elif lang == "de": + return re.sub( + r"\b(ein|eine|einen|einem|eines|einer|der|die|das|den|dem|des)\b", + " ", + text, + ) + elif lang == "ar": + return re.sub(r"\sال^|ال", " ", text) + elif lang == "zh": + return text # Chinese does not have formal articles + else: + raise Exception("Unknown Language {}".format(lang)) + + def white_space_fix(text, lang): + if lang in WHITESPACE_LANGS: + tokens = whitespace_tokenize(text) + elif lang in MIXED_SEGMENTATION_LANGS: + tokens = mixed_segmentation(text) + else: + raise Exception("Unknown Language {}".format(lang)) + return " ".join([t for t in tokens if t.strip() != ""]) + + def remove_punc(text): + return "".join(ch for ch in text if ch not in PUNCT) + + def lower(text): + return text.lower() + + return white_space_fix(remove_articles(remove_punc(lower(s)), lang), lang) + + +def f1_score(prediction, ground_truth, lang): + prediction_tokens = normalize_answer(prediction, lang).split() + ground_truth_tokens = normalize_answer(ground_truth, lang).split() + common = Counter(prediction_tokens) & Counter(ground_truth_tokens) + num_same = sum(common.values()) + if num_same == 0: + return 0 + precision = 1.0 * num_same / len(prediction_tokens) + recall = 1.0 * num_same / len(ground_truth_tokens) + f1 = (2 * precision * recall) / (precision + recall) + return f1 + + +def exact_match_score(prediction, ground_truth, lang): + return normalize_answer(prediction, lang) == normalize_answer(ground_truth, lang) + + +def metric_max_over_ground_truths(metric_fn, prediction, ground_truths, lang): + scores_for_ground_truths = [] + for ground_truth in ground_truths: + score = metric_fn(prediction, ground_truth, lang) + scores_for_ground_truths.append(score) + return max(scores_for_ground_truths) + + +def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: + def _process_doc(doc): + out_doc = { + "context": doc["context"], + "question": doc["question"], + "answers": doc["answers"]["text"], + } + return out_doc + + return dataset.map(_process_doc) + + +# Base function +def process_results_lang(doc, results, lang): + ground_truths = doc["answers"] + prediction = results[0].strip() + exact_match = metric_max_over_ground_truths( + exact_match_score, prediction, ground_truths, lang + ) + f1 = metric_max_over_ground_truths(f1_score, prediction, ground_truths, lang) + return {"exact_match": exact_match, "f1": f1} + + +# Language Wrapper functions +def process_results_en(doc, results): + return process_results_lang(doc, results, "en") + + +def process_results_es(doc, results): + return process_results_lang(doc, results, "es") + + +def process_results_hi(doc, results): + return process_results_lang(doc, results, "hi") + + +def process_results_vi(doc, results): + return process_results_lang(doc, results, "vi") + + +def process_results_de(doc, results): + return process_results_lang(doc, results, "de") + + +def process_results_ar(doc, results): + return process_results_lang(doc, results, "ar") + + +def process_results_zh(doc, results): + return process_results_lang(doc, results, "zh") From 82c2f5be95fa9b4ce4b242f09d307c4c13ae5ed8 Mon Sep 17 00:00:00 2001 From: KahnSvaer Date: Tue, 14 Jan 2025 22:30:42 +0530 Subject: [PATCH 2/7] fix: mlqa ast error --- lm_eval/tasks/mlqa/generate_tasks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lm_eval/tasks/mlqa/generate_tasks.py b/lm_eval/tasks/mlqa/generate_tasks.py index ef21b8669c..493d0f6486 100644 --- a/lm_eval/tasks/mlqa/generate_tasks.py +++ b/lm_eval/tasks/mlqa/generate_tasks.py @@ -29,7 +29,7 @@ def main() -> None: chosen_subtasks.append(config) assert len(chosen_subtasks) == 49 for task in chosen_subtasks: - file_name = f"{task.replace(".","_")}.yaml" + file_name = f"{task.replace('.', '_')}.yaml" context_lang = file_name.split("_")[1] # Not using yaml to avoid tagging issues with !function with open(file_name, "w", encoding="utf-8") as f: @@ -37,8 +37,8 @@ def main() -> None: # Manually writing the YAML-like content inside files to avoid tagging issues f.write("include: mlqa_common_yaml.yaml\n") - f.write(f"task: {task.replace('.', '_')}\n") - f.write(f"dataset_name: {task}\n") + f.write(f"{task.replace('.', '_')}.yaml") + f.write(f"\ndataset_name: {task}\n") f.write( f"process_results: !function utils.process_results_{context_lang}\n" ) From cd4b8220601c23badf18d0208b20e2c0b88f36e4 Mon Sep 17 00:00:00 2001 From: KahnSvaer Date: Wed, 15 Jan 2025 11:50:53 +0530 Subject: [PATCH 3/7] nit: removed .yaml ext from template_yaml --- lm_eval/tasks/mlqa/generate_tasks.py | 4 ++-- lm_eval/tasks/mlqa/mlqa_common_yaml | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 lm_eval/tasks/mlqa/mlqa_common_yaml diff --git a/lm_eval/tasks/mlqa/generate_tasks.py b/lm_eval/tasks/mlqa/generate_tasks.py index 493d0f6486..bce049f674 100644 --- a/lm_eval/tasks/mlqa/generate_tasks.py +++ b/lm_eval/tasks/mlqa/generate_tasks.py @@ -37,8 +37,8 @@ def main() -> None: # Manually writing the YAML-like content inside files to avoid tagging issues f.write("include: mlqa_common_yaml.yaml\n") - f.write(f"{task.replace('.', '_')}.yaml") - f.write(f"\ndataset_name: {task}\n") + f.write(f"task: {task.replace('.', '_')}.yaml\n") + f.write(f"dataset_name: {task}\n") f.write( f"process_results: !function utils.process_results_{context_lang}\n" ) diff --git a/lm_eval/tasks/mlqa/mlqa_common_yaml b/lm_eval/tasks/mlqa/mlqa_common_yaml new file mode 100644 index 0000000000..c52ecb8914 --- /dev/null +++ b/lm_eval/tasks/mlqa/mlqa_common_yaml @@ -0,0 +1,22 @@ +dataset_path: facebook/mlqa +dataset_kwargs: + trust_remote_code: true +test_split: test +validation_split: validation +output_type: generate_until +doc_to_text: "Context: {{context}}\n\nQuestion: {{question}}\n\nAnswer:" +doc_to_target: "{{answers}}" +process_docs: !function utils.process_docs +metric_list: + - metric: exact_match + aggregation: mean + higher_is_better: true + - metric: f1 + aggregation: mean + higher_is_better: true +generation_kwargs: + until: + - "\n" + do_sample: false +metadata: + version: 0.0 From 8fbdad20c5ebd7d56f6dd3820d472909613f7509 Mon Sep 17 00:00:00 2001 From: KahnSvaer Date: Wed, 15 Jan 2025 21:44:33 +0530 Subject: [PATCH 4/7] nit changes: minor modifications generate_tasks.py --- lm_eval/tasks/mlqa/generate_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lm_eval/tasks/mlqa/generate_tasks.py b/lm_eval/tasks/mlqa/generate_tasks.py index bce049f674..7a228fc55a 100644 --- a/lm_eval/tasks/mlqa/generate_tasks.py +++ b/lm_eval/tasks/mlqa/generate_tasks.py @@ -36,7 +36,7 @@ def main() -> None: f.write("# Generated by generate_tasks.py\n") # Manually writing the YAML-like content inside files to avoid tagging issues - f.write("include: mlqa_common_yaml.yaml\n") + f.write("include: mlqa_common_yaml\n") f.write(f"task: {task.replace('.', '_')}.yaml\n") f.write(f"dataset_name: {task}\n") f.write( From fe4a7b0ea79e45fc498a5795722cba8f1f58df0d Mon Sep 17 00:00:00 2001 From: KahnSvaer Date: Wed, 15 Jan 2025 22:30:14 +0530 Subject: [PATCH 5/7] deleted lm_eval/tasks/mlqa/mlqa_common_yaml.yaml --- lm_eval/tasks/mlqa/mlqa_common_yaml.yaml | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 lm_eval/tasks/mlqa/mlqa_common_yaml.yaml diff --git a/lm_eval/tasks/mlqa/mlqa_common_yaml.yaml b/lm_eval/tasks/mlqa/mlqa_common_yaml.yaml deleted file mode 100644 index c52ecb8914..0000000000 --- a/lm_eval/tasks/mlqa/mlqa_common_yaml.yaml +++ /dev/null @@ -1,22 +0,0 @@ -dataset_path: facebook/mlqa -dataset_kwargs: - trust_remote_code: true -test_split: test -validation_split: validation -output_type: generate_until -doc_to_text: "Context: {{context}}\n\nQuestion: {{question}}\n\nAnswer:" -doc_to_target: "{{answers}}" -process_docs: !function utils.process_docs -metric_list: - - metric: exact_match - aggregation: mean - higher_is_better: true - - metric: f1 - aggregation: mean - higher_is_better: true -generation_kwargs: - until: - - "\n" - do_sample: false -metadata: - version: 0.0 From ab9475cdfa20b57ffe4236339788c114e5dfae2c Mon Sep 17 00:00:00 2001 From: KahnSvaer Date: Wed, 15 Jan 2025 22:33:44 +0530 Subject: [PATCH 6/7] tests updated --- lm_eval/tasks/mlqa/mlqa_ar_ar.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_ar_de.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_ar_en.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_ar_es.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_ar_hi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_ar_vi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_ar_zh.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_de_ar.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_de_de.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_de_en.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_de_es.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_de_hi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_de_vi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_de_zh.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_en_ar.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_en_de.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_en_en.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_en_es.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_en_hi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_en_vi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_en_zh.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_es_ar.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_es_de.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_es_en.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_es_es.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_es_hi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_es_vi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_es_zh.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_hi_ar.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_hi_de.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_hi_en.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_hi_es.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_hi_hi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_hi_vi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_hi_zh.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_vi_ar.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_vi_de.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_vi_en.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_vi_es.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_vi_hi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_vi_vi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_vi_zh.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_zh_ar.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_zh_de.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_zh_en.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_zh_es.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_zh_hi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_zh_vi.yaml | 4 ++-- lm_eval/tasks/mlqa/mlqa_zh_zh.yaml | 4 ++-- 49 files changed, 98 insertions(+), 98 deletions(-) diff --git a/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml b/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml index 2cf5e31faf..a1750e3111 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_ar_ar +include: mlqa_common_yaml +task: mlqa_ar_ar.yaml dataset_name: mlqa.ar.ar process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_de.yaml b/lm_eval/tasks/mlqa/mlqa_ar_de.yaml index cff01a0fe6..38d43c1cf4 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_ar_de +include: mlqa_common_yaml +task: mlqa_ar_de.yaml dataset_name: mlqa.ar.de process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_en.yaml b/lm_eval/tasks/mlqa/mlqa_ar_en.yaml index 61ce4810d2..94fd33193d 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_ar_en +include: mlqa_common_yaml +task: mlqa_ar_en.yaml dataset_name: mlqa.ar.en process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_es.yaml b/lm_eval/tasks/mlqa/mlqa_ar_es.yaml index ab68e5bbf2..3b4a39e477 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_ar_es +include: mlqa_common_yaml +task: mlqa_ar_es.yaml dataset_name: mlqa.ar.es process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml b/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml index b924dd547c..070cb39325 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_ar_hi +include: mlqa_common_yaml +task: mlqa_ar_hi.yaml dataset_name: mlqa.ar.hi process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml b/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml index 300f999ce3..991a31110e 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_ar_vi +include: mlqa_common_yaml +task: mlqa_ar_vi.yaml dataset_name: mlqa.ar.vi process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml b/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml index eb8f9f9789..a5083aeef9 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_ar_zh +include: mlqa_common_yaml +task: mlqa_ar_zh.yaml dataset_name: mlqa.ar.zh process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_de_ar.yaml b/lm_eval/tasks/mlqa/mlqa_de_ar.yaml index d8f59736d9..fba0d72118 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_de_ar +include: mlqa_common_yaml +task: mlqa_de_ar.yaml dataset_name: mlqa.de.ar process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_de.yaml b/lm_eval/tasks/mlqa/mlqa_de_de.yaml index fd369e8613..a74c07e4e2 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_de_de +include: mlqa_common_yaml +task: mlqa_de_de.yaml dataset_name: mlqa.de.de process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_en.yaml b/lm_eval/tasks/mlqa/mlqa_de_en.yaml index 22b35e352d..0556bc365b 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_de_en +include: mlqa_common_yaml +task: mlqa_de_en.yaml dataset_name: mlqa.de.en process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_es.yaml b/lm_eval/tasks/mlqa/mlqa_de_es.yaml index b42550f81e..cb10737aed 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_de_es +include: mlqa_common_yaml +task: mlqa_de_es.yaml dataset_name: mlqa.de.es process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_hi.yaml b/lm_eval/tasks/mlqa/mlqa_de_hi.yaml index 2de7d38681..f263432d2c 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_de_hi +include: mlqa_common_yaml +task: mlqa_de_hi.yaml dataset_name: mlqa.de.hi process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_vi.yaml b/lm_eval/tasks/mlqa/mlqa_de_vi.yaml index 3c2db838d7..b48ca051b6 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_de_vi +include: mlqa_common_yaml +task: mlqa_de_vi.yaml dataset_name: mlqa.de.vi process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_zh.yaml b/lm_eval/tasks/mlqa/mlqa_de_zh.yaml index 9878fbf4c1..35d913187e 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_de_zh +include: mlqa_common_yaml +task: mlqa_de_zh.yaml dataset_name: mlqa.de.zh process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_en_ar.yaml b/lm_eval/tasks/mlqa/mlqa_en_ar.yaml index 6a2109fa87..867f7c4357 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_en_ar +include: mlqa_common_yaml +task: mlqa_en_ar.yaml dataset_name: mlqa.en.ar process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_de.yaml b/lm_eval/tasks/mlqa/mlqa_en_de.yaml index 2ab6cbb17b..de8eb48044 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_en_de +include: mlqa_common_yaml +task: mlqa_en_de.yaml dataset_name: mlqa.en.de process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_en.yaml b/lm_eval/tasks/mlqa/mlqa_en_en.yaml index b097b4690e..ace8e7e2e7 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_en_en +include: mlqa_common_yaml +task: mlqa_en_en.yaml dataset_name: mlqa.en.en process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_es.yaml b/lm_eval/tasks/mlqa/mlqa_en_es.yaml index cffd5a1995..ca80e4b953 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_en_es +include: mlqa_common_yaml +task: mlqa_en_es.yaml dataset_name: mlqa.en.es process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_hi.yaml b/lm_eval/tasks/mlqa/mlqa_en_hi.yaml index c2879bf109..b90cb049ac 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_en_hi +include: mlqa_common_yaml +task: mlqa_en_hi.yaml dataset_name: mlqa.en.hi process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_vi.yaml b/lm_eval/tasks/mlqa/mlqa_en_vi.yaml index ae10dee1bf..4eb00da815 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_en_vi +include: mlqa_common_yaml +task: mlqa_en_vi.yaml dataset_name: mlqa.en.vi process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_zh.yaml b/lm_eval/tasks/mlqa/mlqa_en_zh.yaml index 4f3cac5c76..52f185b399 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_en_zh +include: mlqa_common_yaml +task: mlqa_en_zh.yaml dataset_name: mlqa.en.zh process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_es_ar.yaml b/lm_eval/tasks/mlqa/mlqa_es_ar.yaml index ef916babb1..d3e1bdbca5 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_es_ar +include: mlqa_common_yaml +task: mlqa_es_ar.yaml dataset_name: mlqa.es.ar process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_de.yaml b/lm_eval/tasks/mlqa/mlqa_es_de.yaml index 827d77b92a..04689086d7 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_es_de +include: mlqa_common_yaml +task: mlqa_es_de.yaml dataset_name: mlqa.es.de process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_en.yaml b/lm_eval/tasks/mlqa/mlqa_es_en.yaml index 4e706a7bea..909c2349c6 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_es_en +include: mlqa_common_yaml +task: mlqa_es_en.yaml dataset_name: mlqa.es.en process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_es.yaml b/lm_eval/tasks/mlqa/mlqa_es_es.yaml index c8d5f3742c..8a9b88bd17 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_es_es +include: mlqa_common_yaml +task: mlqa_es_es.yaml dataset_name: mlqa.es.es process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_hi.yaml b/lm_eval/tasks/mlqa/mlqa_es_hi.yaml index 501026fa98..c114ea086c 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_es_hi +include: mlqa_common_yaml +task: mlqa_es_hi.yaml dataset_name: mlqa.es.hi process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_vi.yaml b/lm_eval/tasks/mlqa/mlqa_es_vi.yaml index abf0d9ae1e..a0dfde1e8f 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_es_vi +include: mlqa_common_yaml +task: mlqa_es_vi.yaml dataset_name: mlqa.es.vi process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_zh.yaml b/lm_eval/tasks/mlqa/mlqa_es_zh.yaml index 3c41d276cb..c68590a927 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_es_zh +include: mlqa_common_yaml +task: mlqa_es_zh.yaml dataset_name: mlqa.es.zh process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml b/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml index b054cd0c4f..607dbe62a9 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_hi_ar +include: mlqa_common_yaml +task: mlqa_hi_ar.yaml dataset_name: mlqa.hi.ar process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_de.yaml b/lm_eval/tasks/mlqa/mlqa_hi_de.yaml index b323c7cf08..6e8cb4d7a0 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_hi_de +include: mlqa_common_yaml +task: mlqa_hi_de.yaml dataset_name: mlqa.hi.de process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_en.yaml b/lm_eval/tasks/mlqa/mlqa_hi_en.yaml index b6a4640a2d..fa128a5c22 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_hi_en +include: mlqa_common_yaml +task: mlqa_hi_en.yaml dataset_name: mlqa.hi.en process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_es.yaml b/lm_eval/tasks/mlqa/mlqa_hi_es.yaml index 15fb29f3e9..9e1ce0785e 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_hi_es +include: mlqa_common_yaml +task: mlqa_hi_es.yaml dataset_name: mlqa.hi.es process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml b/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml index ed46263105..fb1b27a306 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_hi_hi +include: mlqa_common_yaml +task: mlqa_hi_hi.yaml dataset_name: mlqa.hi.hi process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml b/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml index a6c586a222..4485049970 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_hi_vi +include: mlqa_common_yaml +task: mlqa_hi_vi.yaml dataset_name: mlqa.hi.vi process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml b/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml index 518497b6e0..71320a6af3 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_hi_zh +include: mlqa_common_yaml +task: mlqa_hi_zh.yaml dataset_name: mlqa.hi.zh process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml b/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml index 62a05dcdc9..f23100fe3c 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_vi_ar +include: mlqa_common_yaml +task: mlqa_vi_ar.yaml dataset_name: mlqa.vi.ar process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_de.yaml b/lm_eval/tasks/mlqa/mlqa_vi_de.yaml index e76223a4ff..25e77dd6b8 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_vi_de +include: mlqa_common_yaml +task: mlqa_vi_de.yaml dataset_name: mlqa.vi.de process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_en.yaml b/lm_eval/tasks/mlqa/mlqa_vi_en.yaml index cd6ea68146..dc75b15537 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_vi_en +include: mlqa_common_yaml +task: mlqa_vi_en.yaml dataset_name: mlqa.vi.en process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_es.yaml b/lm_eval/tasks/mlqa/mlqa_vi_es.yaml index ac2f9e2019..2c48fc2d79 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_vi_es +include: mlqa_common_yaml +task: mlqa_vi_es.yaml dataset_name: mlqa.vi.es process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml b/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml index 69e08aa047..341191b7ea 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_vi_hi +include: mlqa_common_yaml +task: mlqa_vi_hi.yaml dataset_name: mlqa.vi.hi process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml b/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml index 4700a643dc..3d5534a7d7 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_vi_vi +include: mlqa_common_yaml +task: mlqa_vi_vi.yaml dataset_name: mlqa.vi.vi process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml b/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml index b1d37862f3..ab5c14efb3 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_vi_zh +include: mlqa_common_yaml +task: mlqa_vi_zh.yaml dataset_name: mlqa.vi.zh process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml b/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml index b42ee1dfe9..ab56d90e37 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_zh_ar +include: mlqa_common_yaml +task: mlqa_zh_ar.yaml dataset_name: mlqa.zh.ar process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_de.yaml b/lm_eval/tasks/mlqa/mlqa_zh_de.yaml index f069cbb965..d146f3ddef 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_zh_de +include: mlqa_common_yaml +task: mlqa_zh_de.yaml dataset_name: mlqa.zh.de process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_en.yaml b/lm_eval/tasks/mlqa/mlqa_zh_en.yaml index 91f464229a..07a0471ef0 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_zh_en +include: mlqa_common_yaml +task: mlqa_zh_en.yaml dataset_name: mlqa.zh.en process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_es.yaml b/lm_eval/tasks/mlqa/mlqa_zh_es.yaml index a831f93fcb..4139571d71 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_zh_es +include: mlqa_common_yaml +task: mlqa_zh_es.yaml dataset_name: mlqa.zh.es process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml b/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml index df115819ac..83a555ca34 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_zh_hi +include: mlqa_common_yaml +task: mlqa_zh_hi.yaml dataset_name: mlqa.zh.hi process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml b/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml index 6ed4b41c1d..12e46bdc1c 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_zh_vi +include: mlqa_common_yaml +task: mlqa_zh_vi.yaml dataset_name: mlqa.zh.vi process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml b/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml index 4c95aecdba..bff084b927 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py -include: mlqa_common_yaml.yaml -task: mlqa_zh_zh +include: mlqa_common_yaml +task: mlqa_zh_zh.yaml dataset_name: mlqa.zh.zh process_results: !function utils.process_results_zh From a78b9d2f02af5ec70e0c452efcfbccf0484f08ae Mon Sep 17 00:00:00 2001 From: KahnSvaer Date: Wed, 15 Jan 2025 22:42:24 +0530 Subject: [PATCH 7/7] nit --- lm_eval/tasks/mlqa/generate_tasks.py | 2 +- lm_eval/tasks/mlqa/mlqa_ar_ar.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_ar_de.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_ar_en.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_ar_es.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_ar_hi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_ar_vi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_ar_zh.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_de_ar.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_de_de.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_de_en.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_de_es.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_de_hi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_de_vi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_de_zh.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_en_ar.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_en_de.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_en_en.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_en_es.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_en_hi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_en_vi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_en_zh.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_es_ar.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_es_de.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_es_en.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_es_es.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_es_hi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_es_vi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_es_zh.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_hi_ar.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_hi_de.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_hi_en.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_hi_es.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_hi_hi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_hi_vi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_hi_zh.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_vi_ar.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_vi_de.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_vi_en.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_vi_es.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_vi_hi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_vi_vi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_vi_zh.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_zh_ar.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_zh_de.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_zh_en.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_zh_es.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_zh_hi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_zh_vi.yaml | 2 +- lm_eval/tasks/mlqa/mlqa_zh_zh.yaml | 2 +- 50 files changed, 50 insertions(+), 50 deletions(-) diff --git a/lm_eval/tasks/mlqa/generate_tasks.py b/lm_eval/tasks/mlqa/generate_tasks.py index 7a228fc55a..19bd3533af 100644 --- a/lm_eval/tasks/mlqa/generate_tasks.py +++ b/lm_eval/tasks/mlqa/generate_tasks.py @@ -37,7 +37,7 @@ def main() -> None: # Manually writing the YAML-like content inside files to avoid tagging issues f.write("include: mlqa_common_yaml\n") - f.write(f"task: {task.replace('.', '_')}.yaml\n") + f.write(f"task: {task.replace('.', '_')}\n") f.write(f"dataset_name: {task}\n") f.write( f"process_results: !function utils.process_results_{context_lang}\n" diff --git a/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml b/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml index a1750e3111..8db625acce 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_ar_ar.yaml +task: mlqa_ar_ar dataset_name: mlqa.ar.ar process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_de.yaml b/lm_eval/tasks/mlqa/mlqa_ar_de.yaml index 38d43c1cf4..3d1468a7bd 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_ar_de.yaml +task: mlqa_ar_de dataset_name: mlqa.ar.de process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_en.yaml b/lm_eval/tasks/mlqa/mlqa_ar_en.yaml index 94fd33193d..18e763e8ac 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_ar_en.yaml +task: mlqa_ar_en dataset_name: mlqa.ar.en process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_es.yaml b/lm_eval/tasks/mlqa/mlqa_ar_es.yaml index 3b4a39e477..c93ef03ec0 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_ar_es.yaml +task: mlqa_ar_es dataset_name: mlqa.ar.es process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml b/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml index 070cb39325..5abb023ccd 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_ar_hi.yaml +task: mlqa_ar_hi dataset_name: mlqa.ar.hi process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml b/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml index 991a31110e..54869c657d 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_ar_vi.yaml +task: mlqa_ar_vi dataset_name: mlqa.ar.vi process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml b/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml index a5083aeef9..5236d6cb87 100644 --- a/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_ar_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_ar_zh.yaml +task: mlqa_ar_zh dataset_name: mlqa.ar.zh process_results: !function utils.process_results_ar diff --git a/lm_eval/tasks/mlqa/mlqa_de_ar.yaml b/lm_eval/tasks/mlqa/mlqa_de_ar.yaml index fba0d72118..1090a58925 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_de_ar.yaml +task: mlqa_de_ar dataset_name: mlqa.de.ar process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_de.yaml b/lm_eval/tasks/mlqa/mlqa_de_de.yaml index a74c07e4e2..be465ab57a 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_de_de.yaml +task: mlqa_de_de dataset_name: mlqa.de.de process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_en.yaml b/lm_eval/tasks/mlqa/mlqa_de_en.yaml index 0556bc365b..55f2652ce4 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_de_en.yaml +task: mlqa_de_en dataset_name: mlqa.de.en process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_es.yaml b/lm_eval/tasks/mlqa/mlqa_de_es.yaml index cb10737aed..d4f085e624 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_de_es.yaml +task: mlqa_de_es dataset_name: mlqa.de.es process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_hi.yaml b/lm_eval/tasks/mlqa/mlqa_de_hi.yaml index f263432d2c..ff3bbc4286 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_de_hi.yaml +task: mlqa_de_hi dataset_name: mlqa.de.hi process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_vi.yaml b/lm_eval/tasks/mlqa/mlqa_de_vi.yaml index b48ca051b6..fe61983b70 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_de_vi.yaml +task: mlqa_de_vi dataset_name: mlqa.de.vi process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_de_zh.yaml b/lm_eval/tasks/mlqa/mlqa_de_zh.yaml index 35d913187e..ee1855626f 100644 --- a/lm_eval/tasks/mlqa/mlqa_de_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_de_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_de_zh.yaml +task: mlqa_de_zh dataset_name: mlqa.de.zh process_results: !function utils.process_results_de diff --git a/lm_eval/tasks/mlqa/mlqa_en_ar.yaml b/lm_eval/tasks/mlqa/mlqa_en_ar.yaml index 867f7c4357..a8c72d2694 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_en_ar.yaml +task: mlqa_en_ar dataset_name: mlqa.en.ar process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_de.yaml b/lm_eval/tasks/mlqa/mlqa_en_de.yaml index de8eb48044..b27e02ae6c 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_en_de.yaml +task: mlqa_en_de dataset_name: mlqa.en.de process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_en.yaml b/lm_eval/tasks/mlqa/mlqa_en_en.yaml index ace8e7e2e7..d15e222f7b 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_en_en.yaml +task: mlqa_en_en dataset_name: mlqa.en.en process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_es.yaml b/lm_eval/tasks/mlqa/mlqa_en_es.yaml index ca80e4b953..eddb728f02 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_en_es.yaml +task: mlqa_en_es dataset_name: mlqa.en.es process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_hi.yaml b/lm_eval/tasks/mlqa/mlqa_en_hi.yaml index b90cb049ac..7c2e38249a 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_en_hi.yaml +task: mlqa_en_hi dataset_name: mlqa.en.hi process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_vi.yaml b/lm_eval/tasks/mlqa/mlqa_en_vi.yaml index 4eb00da815..1a2f635ea3 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_en_vi.yaml +task: mlqa_en_vi dataset_name: mlqa.en.vi process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_en_zh.yaml b/lm_eval/tasks/mlqa/mlqa_en_zh.yaml index 52f185b399..91336eba9a 100644 --- a/lm_eval/tasks/mlqa/mlqa_en_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_en_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_en_zh.yaml +task: mlqa_en_zh dataset_name: mlqa.en.zh process_results: !function utils.process_results_en diff --git a/lm_eval/tasks/mlqa/mlqa_es_ar.yaml b/lm_eval/tasks/mlqa/mlqa_es_ar.yaml index d3e1bdbca5..9a24508cbd 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_es_ar.yaml +task: mlqa_es_ar dataset_name: mlqa.es.ar process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_de.yaml b/lm_eval/tasks/mlqa/mlqa_es_de.yaml index 04689086d7..9a40b2b695 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_es_de.yaml +task: mlqa_es_de dataset_name: mlqa.es.de process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_en.yaml b/lm_eval/tasks/mlqa/mlqa_es_en.yaml index 909c2349c6..660968c7fd 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_es_en.yaml +task: mlqa_es_en dataset_name: mlqa.es.en process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_es.yaml b/lm_eval/tasks/mlqa/mlqa_es_es.yaml index 8a9b88bd17..1232947b92 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_es_es.yaml +task: mlqa_es_es dataset_name: mlqa.es.es process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_hi.yaml b/lm_eval/tasks/mlqa/mlqa_es_hi.yaml index c114ea086c..5502288925 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_es_hi.yaml +task: mlqa_es_hi dataset_name: mlqa.es.hi process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_vi.yaml b/lm_eval/tasks/mlqa/mlqa_es_vi.yaml index a0dfde1e8f..0ea9027dec 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_es_vi.yaml +task: mlqa_es_vi dataset_name: mlqa.es.vi process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_es_zh.yaml b/lm_eval/tasks/mlqa/mlqa_es_zh.yaml index c68590a927..caecd1b2d0 100644 --- a/lm_eval/tasks/mlqa/mlqa_es_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_es_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_es_zh.yaml +task: mlqa_es_zh dataset_name: mlqa.es.zh process_results: !function utils.process_results_es diff --git a/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml b/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml index 607dbe62a9..e4c4263a1d 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_hi_ar.yaml +task: mlqa_hi_ar dataset_name: mlqa.hi.ar process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_de.yaml b/lm_eval/tasks/mlqa/mlqa_hi_de.yaml index 6e8cb4d7a0..8069b5a07b 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_hi_de.yaml +task: mlqa_hi_de dataset_name: mlqa.hi.de process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_en.yaml b/lm_eval/tasks/mlqa/mlqa_hi_en.yaml index fa128a5c22..d7a18067bc 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_hi_en.yaml +task: mlqa_hi_en dataset_name: mlqa.hi.en process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_es.yaml b/lm_eval/tasks/mlqa/mlqa_hi_es.yaml index 9e1ce0785e..d152ad66dc 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_hi_es.yaml +task: mlqa_hi_es dataset_name: mlqa.hi.es process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml b/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml index fb1b27a306..1ce79e6bbe 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_hi_hi.yaml +task: mlqa_hi_hi dataset_name: mlqa.hi.hi process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml b/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml index 4485049970..534d90f70d 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_hi_vi.yaml +task: mlqa_hi_vi dataset_name: mlqa.hi.vi process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml b/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml index 71320a6af3..8432db492d 100644 --- a/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_hi_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_hi_zh.yaml +task: mlqa_hi_zh dataset_name: mlqa.hi.zh process_results: !function utils.process_results_hi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml b/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml index f23100fe3c..c22c11cd06 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_vi_ar.yaml +task: mlqa_vi_ar dataset_name: mlqa.vi.ar process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_de.yaml b/lm_eval/tasks/mlqa/mlqa_vi_de.yaml index 25e77dd6b8..948ac3ac36 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_vi_de.yaml +task: mlqa_vi_de dataset_name: mlqa.vi.de process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_en.yaml b/lm_eval/tasks/mlqa/mlqa_vi_en.yaml index dc75b15537..0106867703 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_vi_en.yaml +task: mlqa_vi_en dataset_name: mlqa.vi.en process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_es.yaml b/lm_eval/tasks/mlqa/mlqa_vi_es.yaml index 2c48fc2d79..9ac62c1056 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_vi_es.yaml +task: mlqa_vi_es dataset_name: mlqa.vi.es process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml b/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml index 341191b7ea..26b232a879 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_vi_hi.yaml +task: mlqa_vi_hi dataset_name: mlqa.vi.hi process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml b/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml index 3d5534a7d7..d8277d78eb 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_vi_vi.yaml +task: mlqa_vi_vi dataset_name: mlqa.vi.vi process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml b/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml index ab5c14efb3..7ecc6b9192 100644 --- a/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_vi_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_vi_zh.yaml +task: mlqa_vi_zh dataset_name: mlqa.vi.zh process_results: !function utils.process_results_vi diff --git a/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml b/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml index ab56d90e37..42c3713d5a 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_ar.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_zh_ar.yaml +task: mlqa_zh_ar dataset_name: mlqa.zh.ar process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_de.yaml b/lm_eval/tasks/mlqa/mlqa_zh_de.yaml index d146f3ddef..cb5e4cb884 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_de.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_de.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_zh_de.yaml +task: mlqa_zh_de dataset_name: mlqa.zh.de process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_en.yaml b/lm_eval/tasks/mlqa/mlqa_zh_en.yaml index 07a0471ef0..653f26aefa 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_en.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_en.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_zh_en.yaml +task: mlqa_zh_en dataset_name: mlqa.zh.en process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_es.yaml b/lm_eval/tasks/mlqa/mlqa_zh_es.yaml index 4139571d71..c98203f76f 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_es.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_es.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_zh_es.yaml +task: mlqa_zh_es dataset_name: mlqa.zh.es process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml b/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml index 83a555ca34..ed58f47f4d 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_hi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_zh_hi.yaml +task: mlqa_zh_hi dataset_name: mlqa.zh.hi process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml b/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml index 12e46bdc1c..7043676235 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_vi.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_zh_vi.yaml +task: mlqa_zh_vi dataset_name: mlqa.zh.vi process_results: !function utils.process_results_zh diff --git a/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml b/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml index bff084b927..792b5ee0c9 100644 --- a/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml +++ b/lm_eval/tasks/mlqa/mlqa_zh_zh.yaml @@ -1,5 +1,5 @@ # Generated by generate_tasks.py include: mlqa_common_yaml -task: mlqa_zh_zh.yaml +task: mlqa_zh_zh dataset_name: mlqa.zh.zh process_results: !function utils.process_results_zh