diff --git a/community_tasks/_template.py b/community_tasks/_template.py index d0099ba26..2db28e340 100644 --- a/community_tasks/_template.py +++ b/community_tasks/_template.py @@ -116,10 +116,3 @@ def __init__( sample_level_fn=lambda x: x, # how to compute score for one sample corpus_level_fn=np.mean, # aggregation ) - -# MODULE LOGIC -# You should not need to touch this -# Convert to dict for lighteval -if __name__ == "__main__": - print(t.name for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/community_tasks/aimo_evals.py b/community_tasks/aimo_evals.py index be59950bd..885ffd8da 100644 --- a/community_tasks/aimo_evals.py +++ b/community_tasks/aimo_evals.py @@ -56,11 +56,3 @@ def aimo_prompt(line, task_name: str = None): # STORE YOUR EVALS TASKS_TABLE = [task] - - -# MODULE LOGIC -# You should not need to touch this - -if __name__ == "__main__": - print(t.name for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 07a096eca..4408f22fa 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -856,7 +856,3 @@ def __init__( + [toxigen_ar_task] + [sciq_ar_task] ) - -if __name__ == "__main__": - print(t.name for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/community_tasks/german_rag_evals.py b/community_tasks/german_rag_evals.py index 30d6dcb4a..78af6794e 100644 --- a/community_tasks/german_rag_evals.py +++ b/community_tasks/german_rag_evals.py @@ -221,11 +221,3 @@ def prompt_fn_context_question_match(line, task_name: str = None): # STORE YOUR EVALS TASKS_TABLE = [task1, task2, task3, task4] - - -# MODULE LOGIC -# You should not need to touch this - -if __name__ == "__main__": - print(t.name for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/community_tasks/oz_evals.py b/community_tasks/oz_evals.py index 6252a20a0..4ec70e291 100644 --- a/community_tasks/oz_evals.py +++ b/community_tasks/oz_evals.py @@ -87,8 +87,3 @@ def prompt_fn_oz_eval_task(line, task_name: str = None): # STORE YOUR EVALS TASKS_TABLE = [oz_eval_task] - - -if __name__ == "__main__": - print(t["name"] for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/community_tasks/serbian_eval.py b/community_tasks/serbian_eval.py index 3b49c4cb0..d972ac69c 100644 --- a/community_tasks/serbian_eval.py +++ b/community_tasks/serbian_eval.py @@ -784,7 +784,3 @@ def create_task_config( mmlu_world_religions, mmlu_all, ] - -if __name__ == "__main__": - print(t.name for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/docs/source/adding-a-custom-task.mdx b/docs/source/adding-a-custom-task.mdx index e1823b7b9..e5160024d 100644 --- a/docs/source/adding-a-custom-task.mdx +++ b/docs/source/adding-a-custom-task.mdx @@ -167,17 +167,6 @@ TASKS_TABLE = SUBSET_TASKS # TASKS_TABLE = [task] ``` -Finally, you need to add a module logic to convert your task to a dict for lighteval. - -```python -# MODULE LOGIC -# You should not need to touch this -# Convert to dict for lighteval -if __name__ == "__main__": - print(t.name for t in TASKS_TABLE) - print(len(TASKS_TABLE)) -``` - Once your file is created you can then run the evaluation with the following command: ```bash diff --git a/examples/nanotron/custom_evaluation_tasks.py b/examples/nanotron/custom_evaluation_tasks.py index 78c354916..e4b1b5ca6 100644 --- a/examples/nanotron/custom_evaluation_tasks.py +++ b/examples/nanotron/custom_evaluation_tasks.py @@ -671,7 +671,3 @@ def __init__( "all": ",".join(t[1] for t in _TASKS_STRINGS), "early-signal": EARLY_SIGNAL_TASKS, } - -if __name__ == "__main__": - print(t["name"] for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index 3454a223b..a0a01abe8 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -33,10 +33,10 @@ TOKEN = os.getenv("HF_TOKEN") CACHE_DIR: str = os.getenv("HF_HOME", "/scratch") -HELP_PANNEL_NAME_1 = "Common Paramaters" -HELP_PANNEL_NAME_2 = "Logging Parameters" -HELP_PANNEL_NAME_3 = "Debug Paramaters" -HELP_PANNEL_NAME_4 = "Modeling Paramaters" +HELP_PANEL_NAME_1 = "Common Parameters" +HELP_PANEL_NAME_2 = "Logging Parameters" +HELP_PANEL_NAME_3 = "Debug Parameters" +HELP_PANEL_NAME_4 = "Modeling Parameters" def accelerate( # noqa C901 @@ -50,51 +50,51 @@ def accelerate( # noqa C901 tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], # === Common parameters === use_chat_template: Annotated[ - bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = False, system_prompt: Annotated[ - Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = None, dataset_loading_processes: Annotated[ - int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, custom_tasks: Annotated[ - Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1) + Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1) ] = None, cache_dir: Annotated[ - Optional[str], Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1) + Optional[str], Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1) ] = None, num_fewshot_seeds: Annotated[ - int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, # === saving === output_dir: Annotated[ - str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2) + str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2) ] = "results", push_to_hub: Annotated[ - bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, push_to_tensorboard: Annotated[ - bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, public_run: Annotated[ - bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, results_org: Annotated[ - Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANNEL_NAME_2) + Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2) ] = None, save_details: Annotated[ - bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, # === debug === max_samples: Annotated[ - Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3) + Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3) ] = None, override_batch_size: Annotated[ - int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANNEL_NAME_3) + int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANEL_NAME_3) ] = -1, job_id: Annotated[ - int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANNEL_NAME_3) + int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3) ] = 0, ): """ diff --git a/src/lighteval/main_baseline.py b/src/lighteval/main_baseline.py index dd4786679..2dd970ea8 100644 --- a/src/lighteval/main_baseline.py +++ b/src/lighteval/main_baseline.py @@ -30,28 +30,28 @@ CACHE_DIR: str = os.getenv("HF_HOME", "/scratch") -HELP_PANNEL_NAME_1 = "Common Paramaters" -HELP_PANNEL_NAME_2 = "Logging Parameters" -HELP_PANNEL_NAME_3 = "Debug Paramaters" -HELP_PANNEL_NAME_4 = "Modeling Paramaters" +HELP_PANEL_NAME_1 = "Common Parameters" +HELP_PANEL_NAME_2 = "Logging Parameters" +HELP_PANEL_NAME_3 = "Debug Parameters" +HELP_PANEL_NAME_4 = "Modeling Parameters" def baseline( tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], cache_dir: Annotated[ - str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1) + str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1) ] = CACHE_DIR, custom_tasks: Annotated[ - Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1) + Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1) ] = None, dataset_loading_processes: Annotated[ - int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, output_dir: Annotated[ - str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2) + str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2) ] = "results", max_samples: Annotated[ - Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3) + Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3) ] = None, ): """ diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index be75b711a..208cc8386 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -33,10 +33,10 @@ TOKEN = os.getenv("HF_TOKEN") CACHE_DIR: str = os.getenv("HF_HOME", "/scratch") -HELP_PANNEL_NAME_1 = "Common Paramaters" -HELP_PANNEL_NAME_2 = "Logging Parameters" -HELP_PANNEL_NAME_3 = "Debug Paramaters" -HELP_PANNEL_NAME_4 = "Modeling Paramaters" +HELP_PANEL_NAME_1 = "Common Parameters" +HELP_PANEL_NAME_2 = "Logging Parameters" +HELP_PANEL_NAME_3 = "Debug Parameters" +HELP_PANEL_NAME_4 = "Modeling Parameters" @app.command(rich_help_panel="Evaluation Backends") @@ -48,45 +48,45 @@ def openai( tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], # === Common parameters === system_prompt: Annotated[ - Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = None, dataset_loading_processes: Annotated[ - int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, custom_tasks: Annotated[ - Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1) + Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1) ] = None, cache_dir: Annotated[ - str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1) + str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1) ] = CACHE_DIR, num_fewshot_seeds: Annotated[ - int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, # === saving === output_dir: Annotated[ - str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2) + str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2) ] = "results", push_to_hub: Annotated[ - bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, push_to_tensorboard: Annotated[ - bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, public_run: Annotated[ - bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, results_org: Annotated[ - Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANNEL_NAME_2) + Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2) ] = None, save_details: Annotated[ - bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, # === debug === max_samples: Annotated[ - Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3) + Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3) ] = None, job_id: Annotated[ - int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANNEL_NAME_3) + int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3) ] = 0, ): """ @@ -148,51 +148,51 @@ def inference_endpoint( tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], # === Common parameters === use_chat_template: Annotated[ - bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = False, system_prompt: Annotated[ - Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = None, dataset_loading_processes: Annotated[ - int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, custom_tasks: Annotated[ - Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1) + Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1) ] = None, cache_dir: Annotated[ - str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1) + str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1) ] = CACHE_DIR, num_fewshot_seeds: Annotated[ - int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, # === saving === output_dir: Annotated[ - str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2) + str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2) ] = "results", push_to_hub: Annotated[ - bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, push_to_tensorboard: Annotated[ - bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, public_run: Annotated[ - bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, results_org: Annotated[ - Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANNEL_NAME_2) + Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2) ] = None, save_details: Annotated[ - bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, # === debug === max_samples: Annotated[ - Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3) + Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3) ] = None, override_batch_size: Annotated[ - int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANNEL_NAME_3) + int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANEL_NAME_3) ] = None, job_id: Annotated[ - int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANNEL_NAME_3) + int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3) ] = 0, ): """ @@ -264,51 +264,51 @@ def tgi( tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], # === Common parameters === use_chat_template: Annotated[ - bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = False, system_prompt: Annotated[ - Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = None, dataset_loading_processes: Annotated[ - int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, custom_tasks: Annotated[ - Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1) + Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1) ] = None, cache_dir: Annotated[ - str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1) + str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1) ] = CACHE_DIR, num_fewshot_seeds: Annotated[ - int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, # === saving === output_dir: Annotated[ - str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2) + str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2) ] = "results", push_to_hub: Annotated[ - bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, push_to_tensorboard: Annotated[ - bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, public_run: Annotated[ - bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, results_org: Annotated[ - Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANNEL_NAME_2) + Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2) ] = None, save_details: Annotated[ - bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, # === debug === max_samples: Annotated[ - Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3) + Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3) ] = None, override_batch_size: Annotated[ - int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANNEL_NAME_3) + int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANEL_NAME_3) ] = -1, job_id: Annotated[ - int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANNEL_NAME_3) + int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3) ] = 0, ): """ diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index 66826122e..94004c065 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -29,10 +29,10 @@ CACHE_DIR: str = os.getenv("HF_HOME", "/scratch") -HELP_PANNEL_NAME_1 = "Common Paramaters" -HELP_PANNEL_NAME_2 = "Logging Parameters" -HELP_PANNEL_NAME_3 = "Debug Paramaters" -HELP_PANNEL_NAME_4 = "Modeling Paramaters" +HELP_PANEL_NAME_1 = "Common Parameters" +HELP_PANEL_NAME_2 = "Logging Parameters" +HELP_PANEL_NAME_3 = "Debug Parameters" +HELP_PANEL_NAME_4 = "Modeling Parameters" SEED = 1234 diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py index 078000da5..28c4abdc0 100644 --- a/src/lighteval/main_vllm.py +++ b/src/lighteval/main_vllm.py @@ -29,10 +29,10 @@ TOKEN = os.getenv("HF_TOKEN") CACHE_DIR: str = os.getenv("HF_HOME", "/scratch") -HELP_PANNEL_NAME_1 = "Common Paramaters" -HELP_PANNEL_NAME_2 = "Logging Parameters" -HELP_PANNEL_NAME_3 = "Debug Paramaters" -HELP_PANNEL_NAME_4 = "Modeling Paramaters" +HELP_PANEL_NAME_1 = "Common Parameters" +HELP_PANEL_NAME_2 = "Logging Parameters" +HELP_PANEL_NAME_3 = "Debug Parameters" +HELP_PANEL_NAME_4 = "Modeling Parameters" def vllm( @@ -41,48 +41,48 @@ def vllm( tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], # === Common parameters === use_chat_template: Annotated[ - bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = False, system_prompt: Annotated[ - Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) + Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4) ] = None, dataset_loading_processes: Annotated[ - int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, custom_tasks: Annotated[ - Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1) + Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1) ] = None, cache_dir: Annotated[ - str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1) + str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANEL_NAME_1) ] = CACHE_DIR, num_fewshot_seeds: Annotated[ - int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANNEL_NAME_1) + int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1) ] = 1, # === saving === output_dir: Annotated[ - str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2) + str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2) ] = "results", push_to_hub: Annotated[ - bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, push_to_tensorboard: Annotated[ - bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, public_run: Annotated[ - bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, results_org: Annotated[ - Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANNEL_NAME_2) + Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2) ] = None, save_details: Annotated[ - bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANNEL_NAME_2) + bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2) ] = False, # === debug === max_samples: Annotated[ - Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3) + Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3) ] = None, job_id: Annotated[ - int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANNEL_NAME_3) + int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3) ] = 0, ): """ diff --git a/src/lighteval/tasks/extended/ifeval/main.py b/src/lighteval/tasks/extended/ifeval/main.py index e6947bb6e..60d1be5fa 100644 --- a/src/lighteval/tasks/extended/ifeval/main.py +++ b/src/lighteval/tasks/extended/ifeval/main.py @@ -160,8 +160,3 @@ def agg_inst_level_acc(items): TASKS_TABLE = [ifeval] extend_enum(Metrics, "ifeval_metric", ifeval_metrics) - -if __name__ == "__main__": - # Adds the metric to the metric list! - print(t["name"] for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/src/lighteval/tasks/extended/mix_eval/main.py b/src/lighteval/tasks/extended/mix_eval/main.py index 15d7490bc..8684e910c 100644 --- a/src/lighteval/tasks/extended/mix_eval/main.py +++ b/src/lighteval/tasks/extended/mix_eval/main.py @@ -228,8 +228,3 @@ def mean_dv_5(x): TASKS_TABLE = [mixeval_multichoice_easy, mixeval_freeform_easy, mixeval_multichoice_hard, mixeval_freeform_hard] - -if __name__ == "__main__": - # Adds the metric to the metric list! - print(t["name"] for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/src/lighteval/tasks/extended/mt_bench/main.py b/src/lighteval/tasks/extended/mt_bench/main.py index e5b209982..117e363dd 100644 --- a/src/lighteval/tasks/extended/mt_bench/main.py +++ b/src/lighteval/tasks/extended/mt_bench/main.py @@ -95,7 +95,3 @@ def flow_judge_mt_bench_prompt(question, answer, options, gold): TASKS_TABLE = [task] - -if __name__ == "__main__": - print(t["name"] for t in TASKS_TABLE) - print(len(TASKS_TABLE)) diff --git a/src/lighteval/tasks/extended/tiny_benchmarks/main.py b/src/lighteval/tasks/extended/tiny_benchmarks/main.py index b283921f2..fae6e89df 100644 --- a/src/lighteval/tasks/extended/tiny_benchmarks/main.py +++ b/src/lighteval/tasks/extended/tiny_benchmarks/main.py @@ -283,11 +283,3 @@ def aggregate(self, y_input): corpus_level_fn=TinyCorpusAggregator(name).aggregate, ), ) - - -# MODULE LOGIC -# You should not need to touch this -# Convert to dict for lighteval -if __name__ == "__main__": - print(t["name"] for t in TASKS_TABLE) - print(len(TASKS_TABLE))