diff --git a/CybersecurityBenchmarks/README.md b/CybersecurityBenchmarks/README.md index 9f61abc1..56b5c2c0 100644 --- a/CybersecurityBenchmarks/README.md +++ b/CybersecurityBenchmarks/README.md @@ -199,6 +199,9 @@ FRR benchmark is run in two steps: ## Running Secure Code Generation Benchmarks +Note: Secure Code Generation Benchmarks are temporarily removed from the default list, as our team is identifying the best relative import solution, as outlined in [this PR][https://github.com/meta-llama/PurpleLlama/pull/71]. +If you need to run this benchmark, please uncomment the relevant code in [run.py](https://github.com/meta-llama/PurpleLlama/blob/main/CybersecurityBenchmarks/benchmark/run.py) and resolve the import solution (such as the current solution in the PR). + ### For Instruct Benchmark ``` diff --git a/CybersecurityBenchmarks/benchmark/run.py b/CybersecurityBenchmarks/benchmark/run.py index 2185c065..ea76f3f0 100644 --- a/CybersecurityBenchmarks/benchmark/run.py +++ b/CybersecurityBenchmarks/benchmark/run.py @@ -24,7 +24,8 @@ from .canary_exploit_benchmark import CanaryExploitBenchmark from .frr_benchmark import FRRBenchmark -from .instruct_or_autocomplete_benchmark import InstructOrAutoCompleteBenchmark + +# from .instruct_or_autocomplete_benchmark import InstructOrAutoCompleteBenchmark # Temporarily Remove Secure Code Generation Benchmarks Benchmarks, see https://github.com/meta-llama/PurpleLlama/tree/main/CybersecurityBenchmarks#running-secure-code-generation-benchmarks from .interpreter_benchmark import InterpreterBenchmark from .mitre_benchmark import MitreBenchmark from .multiturn_phishing_benchmark import MultiturnPhishingBenchmark @@ -33,7 +34,7 @@ LOG: logging.Logger = logging.getLogger(__name__) -Benchmark.register_benchmark(InstructOrAutoCompleteBenchmark) +# Benchmark.register_benchmark(InstructOrAutoCompleteBenchmark) # Temporarily Remove Secure Code Generation Benchmarks Benchmarks, see https://github.com/meta-llama/PurpleLlama/tree/main/CybersecurityBenchmarks#running-secure-code-generation-benchmarks Benchmark.register_benchmark(MitreBenchmark) Benchmark.register_benchmark(FRRBenchmark) Benchmark.register_benchmark(PromptInjectionBenchmark)