Merge branch 'unify-benchmark-ci' of https://github.com/intel/llvm in…

…to unify-benchmark-ci
intel · Mar 6, 2025 · 18fff93 · 18fff93
2 parents 68ed0c4 + ad13e93
commit 18fff93
Show file tree

Hide file tree

Showing 12 changed files with 169 additions and 86 deletions.
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
@@ -170,7 +170,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
-                    description=self.description()
+                    description=self.description(),
                 )
             )
         return ret

diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -139,7 +139,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
-                    description=self.description()
+                    description=self.description(),
                 )
             )
         return results

diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
@@ -105,7 +105,6 @@ def __init__(self, bench, name, test):
         self.bench = bench
         self.bench_name = name
         self.test = test
-        self.done = False
 
     def bin_args(self) -> list[str]:
         return []
@@ -119,8 +118,6 @@ def setup(self):
         )
 
     def run(self, env_vars) -> list[Result]:
-        if self.done:
-            return
         self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
 
         command = [
@@ -152,17 +149,17 @@ def run(self, env_vars) -> list[Result]:
                             unit="ms",
                         )
                     )
-        self.done = True
-        return res_list
 
-    def teardown(self):
-        print(f"Removing {self.outputfile}...")
         os.remove(self.outputfile)
-        return
+
+        return res_list
 
     def name(self):
         return f"{self.bench.name()} {self.test}"
 
+    def teardown(self):
+        return
+
 
 # multi benchmarks
 class Blocked_transform(SyclBenchmark):

diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
@@ -136,7 +136,7 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
-                description=self.description()
+                description=self.description(),
             )
         ]
 

diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
@@ -61,11 +61,12 @@ def extract_index(file_path: Path) -> int:
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
-            result = run("git rev-parse --short HEAD")
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            result = run("git rev-parse --short HEAD", cwd=script_dir)
             git_hash = result.stdout.decode().strip()
 
             # Get the GitHub repo URL from git remote
-            remote_result = run("git remote get-url origin")
+            remote_result = run("git remote get-url origin", cwd=script_dir)
             remote_url = remote_result.stdout.decode().strip()
 
             # Convert SSH or HTTPS URL to owner/repo format

diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
@@ -1,5 +1,2 @@
-const config = {
-    remoteDataUrl: ''
-};
-// defaultCompareNames = [];
-// suiteNames = [];
+//remoteDataUrl = 'https://example.com/data.json';
+//defaultCompareNames = ['baseline'];
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
@@ -114,14 +114,12 @@ function createChart(data, containerId, type) {
 
     const chartConfig = {
         type: type === 'time' ? 'line' : 'bar',
-        data: type === 'time' ?
-            {
-                datasets: createTimeseriesDatasets(data)
-            } :
-            {
-                labels: data.labels,
-                datasets: data.datasets
-            },
+        data: type === 'time' ? {
+            datasets: createTimeseriesDatasets(data)
+        } : {
+            labels: data.labels,
+            datasets: data.datasets
+        },
         options: options
     };
 
@@ -221,10 +219,12 @@ function createChartContainer(data, canvasId) {
     summary.appendChild(downloadButton);
     details.appendChild(summary);
 
+    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
+
     // Create and append extra info
     const extraInfo = document.createElement('div');
     extraInfo.className = 'extra-info';
-    extraInfo.innerHTML = generateExtraInfo(data);
+    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
     details.appendChild(extraInfo);
 
     container.appendChild(details);
@@ -252,9 +252,8 @@ function createLatestRunsLookup(benchmarkRuns) {
 
     return latestRunsMap;
 }
-const latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
 
-function generateExtraInfo(data) {
+function generateExtraInfo(latestRunsLookup, data) {
     const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
 
     return labels.map(label => {
@@ -283,7 +282,7 @@ function downloadChart(canvasId, label) {
     const chart = chartInstances.get(canvasId);
     if (chart) {
         const link = document.createElement('a');
-        link.href = chart.toBase64Image('image/jpeg', 1)
+        link.href = chart.toBase64Image('image/png', 1)
         link.download = `${label}.png`;
         link.click();
     }
@@ -445,6 +444,13 @@ function setupRunSelector() {
 function setupSuiteFilters() {
     suiteFiltersContainer = document.getElementById('suite-filters');
 
+    const suiteNames = new Set();
+    benchmarkRuns.forEach(run => {
+        run.results.forEach(result => {
+            suiteNames.add(result.suite);
+        });
+    });
+
     suiteNames.forEach(suite => {
         const label = document.createElement('label');
         const checkbox = document.createElement('input');
@@ -530,16 +536,18 @@ function loadData() {
     const loadingIndicator = document.getElementById('loading-indicator');
     loadingIndicator.style.display = 'block'; // Show loading indicator
 
-    if (config.remoteDataUrl && config.remoteDataUrl !== '') {
+    if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') {
         // Fetch data from remote URL
-        fetch(config.remoteDataUrl)
-            .then(response => response.text())
-            .then(scriptContent => {
-                // Evaluate the script content
-                eval(scriptContent);
+        fetch(remoteDataUrl)
+            .then(response => response.json())
+            .then(data => {
+                benchmarkRuns = data;
                 initializeCharts();
             })
-            .catch(error => console.error('Error fetching remote data:', error))
+            .catch(error => {
+                console.error('Error fetching remote data:', error);
+                loadingIndicator.textContent = 'Fetching remote data failed.';
+            })
             .finally(() => {
                 loadingIndicator.style.display = 'none'; // Hide loading indicator
             });

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
@@ -17,6 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
+from presets import preset_get_by_name, presets
 
 import argparse
 import re
@@ -153,7 +154,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             SyclBench(directory),
             LlamaCppBench(directory),
             UMFSuite(directory),
-            # TestSuite()
+            TestSuite(),
         ]
         if not options.dry_run
         else []
@@ -163,6 +164,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     failures = {}
 
     for s in suites:
+        if s.name() not in options.preset.suites():
+            continue
+
         suite_benchmarks = s.benchmarks()
         if filter:
             suite_benchmarks = [
@@ -182,14 +186,13 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
                 print(f"{type(s).__name__} setup complete.")
                 benchmarks += suite_benchmarks
 
-    for b in benchmarks:
-        print(b.name())
-
     for benchmark in benchmarks:
         try:
-            print(f"Setting up {benchmark.name()}... ")
+            if options.verbose:
+                print(f"Setting up {benchmark.name()}... ")
             benchmark.setup()
-            print(f"{benchmark.name()} setup complete.")
+            if options.verbose:
+                print(f"{benchmark.name()} setup complete.")
 
         except Exception as e:
             if options.exit_on_failure:
@@ -279,8 +282,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if options.output_html:
         generate_html(history.runs, compare_names)
 
-        print(f"See {os.getcwd()}/html/index.html for the results.")
-
 
 def validate_and_parse_env_args(env_args):
     env_vars = {}
@@ -362,12 +363,6 @@ def validate_and_parse_env_args(env_args):
         help="Regex pattern to filter benchmarks by name.",
         default=None,
     )
-    parser.add_argument(
-        "--epsilon",
-        type=float,
-        help="Threshold to consider change of performance significant",
-        default=options.epsilon,
-    )
     parser.add_argument(
         "--verbose", help="Print output of all the commands.", action="store_true"
     )
@@ -394,7 +389,11 @@ def validate_and_parse_env_args(env_args):
         help="Specify whether markdown output should fit the content size limit for request validation",
     )
     parser.add_argument(
-        "--output-html", help="Create HTML output", action="store_true", default=False
+        "--output-html",
+        help="Create HTML output. Local output is for direct local viewing of the html file, remote is for server deployment.",
+        nargs="?",
+        const=options.output_html,
+        choices=["local", "remote"],
     )
     parser.add_argument(
         "--dry-run",
@@ -438,6 +437,13 @@ def validate_and_parse_env_args(env_args):
         help="Directory for cublas library",
         default=None,
     )
+    parser.add_argument(
+        "--preset",
+        type=str,
+        choices=[p.name() for p in presets],
+        help="Benchmark preset to run.",
+        default=options.preset.name(),
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -449,7 +455,6 @@ def validate_and_parse_env_args(env_args):
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
-    options.epsilon = args.epsilon
     options.ur = args.ur
     options.ur_adapter = args.adapter
     options.exit_on_failure = args.exit_on_failure
@@ -464,6 +469,7 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
+    options.preset = preset_get_by_name(args.preset)
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")

diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
+from presets import Preset, presets
 
 
 class Compare(Enum):
@@ -29,18 +30,17 @@ class Options:
     compare: Compare = Compare.LATEST
     compare_max: int = 10  # average/median over how many results
     output_markdown: MarkdownSize = MarkdownSize.SHORT
-    output_html: bool = False
+    output_html: str = "local"
     dry_run: bool = False
-    # these two should probably be merged into one setting
     stddev_threshold: float = 0.02
-    epsilon: float = 0.02
     iterations_stddev: int = 5
     build_compute_runtime: bool = False
     extra_ld_libraries: list[str] = field(default_factory=list)
     extra_env_vars: dict = field(default_factory=dict)
-    compute_runtime_tag: str = "25.05.32567.12"
+    compute_runtime_tag: str = "25.05.32567.18"
     build_igc: bool = False
     current_run_name: str = "This PR"
+    preset: Preset = presets[0]
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
@@ -5,34 +5,43 @@
 
 import json
 import os
+from options import options
 
 
 def generate_html(benchmark_runs: list, compare_names: list[str]):
-
-    # Get unique suite names
-    suite_names = {result.suite for run in benchmark_runs for result in run.results}
-
     # create path to data.js in html folder
-    data_path = os.path.join(os.path.dirname(__file__), "html", "data.js")
-
-    # Write data to js file
-    # We can't store this as a standalone json file because it needs to be inline in the html
-    with open(data_path, "w") as f:
-        f.write("const benchmarkRuns = [\n")
-        # it might be tempting to just to create a list and convert
-        # that to a json, but that leads to json being serialized twice.
-        for i, run in enumerate(benchmark_runs):
-            if i > 0:
-                f.write(",\n")
-            f.write(run.to_json())
-
-        f.write("\n];\n\n")  # terminates benchmarkRuns
-
-        # these are not const because they might be modified
-        # in config.js
-        f.write("defaultCompareNames = ")
-        json.dump(compare_names, f)
-        f.write(";\n\n")  # terminates defaultCompareNames
-        f.write("suiteNames = ")
-        json.dump(list(suite_names), f)
-        f.write(";")  # terminates suiteNames
+    html_path = os.path.join(os.path.dirname(__file__), "html")
+
+    if options.output_html == "local":
+        data_path = os.path.join(html_path, "data.js")
+        # Write data to js file
+        # We can't store this as a standalone json file because it needs to be inline in the html
+        with open(data_path, "w") as f:
+            f.write("benchmarkRuns = [\n")
+            # it might be tempting to just to create a list and convert
+            # that to a json, but that leads to json being serialized twice.
+            for i, run in enumerate(benchmark_runs):
+                if i > 0:
+                    f.write(",\n")
+                f.write(run.to_json())
+
+            f.write("\n];\n\n")  # terminates benchmarkRuns
+
+            f.write("defaultCompareNames = ")
+            json.dump(compare_names, f)
+            f.write(";\n")  # terminates defaultCompareNames
+
+        print(f"See {os.getcwd()}/html/index.html for the results.")
+    else:
+        data_path = os.path.join(html_path, "data.json")
+        with open(data_path, "w") as f:
+            f.write("[\n")
+            for i, run in enumerate(benchmark_runs):
+                if i > 0:
+                    f.write(",\n")
+                f.write(run.to_json())
+            f.write("\n];\n")
+
+        print(
+            f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
+        )