diff --git a/benchmarks/benchmark_result.py b/benchmarks/benchmark_result.py index 5686833c7..a21d56952 100644 --- a/benchmarks/benchmark_result.py +++ b/benchmarks/benchmark_result.py @@ -26,7 +26,7 @@ class BenchmarkResult: default=None, metadata={"formatted_name": "Test output", "display": "code"} ) run_error: Optional[str] = attr.ib( - default=None, metadata={"aggregation": "percent"} + default=None, metadata={"formatted_name": "Run Error", "display": "code"} ) response: Optional[str] = attr.ib( default=None, metadata={"formatted_name": "Analysis", "display": "text"} diff --git a/mentat/resources/templates/benchmark.jinja b/mentat/resources/templates/benchmark.jinja index 10182e1b9..4dba5313e 100644 --- a/mentat/resources/templates/benchmark.jinja +++ b/mentat/resources/templates/benchmark.jinja @@ -66,9 +66,9 @@

{{ display_name|capitalize }}

{% if section.type == "text" %} - {{ section.content }} + {{ section.content |escape }} {% elif section.type == "code" %} -
{{ section.content }}
+
{{ section.content |escape }}
{% elif section.type == "json" %}
{{ section.content |tojson(indent=4)|safe}}
{% elif section.type == "transcript" %}