From 9e3a5e7a22f60074e466edff319398e54126b697 Mon Sep 17 00:00:00 2001 From: Shengye Wan Date: Thu, 18 Jan 2024 05:38:50 -0800 Subject: [PATCH] Improvement: apply use_precomputed_responses for llm-under-test requirement Summary: Similar to the usage of `prompt-path`, we should not request `llm-under-test` when the flag `use-precomputed-responses` is set. Any precomputed response file should already include the keyword `model` as the LLM that generates the response. Reviewed By: YueLi28 Differential Revision: D52838979 fbshipit-source-id: a35312520297dc8cf3446f4442d5421af67312c0 --- CybersecurityBenchmarks/benchmark/run.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/CybersecurityBenchmarks/benchmark/run.py b/CybersecurityBenchmarks/benchmark/run.py index dee5d267e..bec547eae 100644 --- a/CybersecurityBenchmarks/benchmark/run.py +++ b/CybersecurityBenchmarks/benchmark/run.py @@ -39,6 +39,10 @@ def validate_arguments(parser: argparse.ArgumentParser) -> argparse.Namespace: parser.error( "The following argument is required: --prompt-path, unless argument --use-precomputed-responses is specified." ) + if "llm_under_test" in args and args.llm_under_test is None: + parser.error( + "The following argument is required: --llm-under-test, unless argument --use-precomputed-responses is specified." + ) return args @@ -72,7 +76,6 @@ def main( "--llm-under-test", help=f"LLM to benchmark provided as {llm.SPECIFICATION_FORMAT}, e.g., {llm.EXAMPLE_SPECIFICATION}", action="append", - required=True, ) parser.add_argument( @@ -157,11 +160,13 @@ def main( level=logging.INFO if not args.debug else logging.DEBUG, ) - materialized_llms_under_test = llms_under_test or [ - llm.create(specification) for specification in args.llm_under_test - ] - if len(materialized_llms_under_test) == 0: - raise ValueError("No LLMs were specified.") + materialized_llms_under_test = [] + if not args.use_precomputed_responses: + materialized_llms_under_test = llms_under_test or [ + llm.create(specification) for specification in args.llm_under_test + ] + if len(materialized_llms_under_test) == 0: + raise ValueError("No LLMs were specified.") benchmark: Benchmark stat_response_path = Path(args.stat_path) if args.stat_path else None