Merge branch 'main' into ajpal-polybench2

egraphs-good · Feb 19, 2025 · 469747c · 469747c
2 parents 924f299 + 058bb25
commit 469747c
Show file tree

Hide file tree

Showing 7 changed files with 188 additions and 44 deletions.
diff --git a/dag_in_context/src/lib.rs b/dag_in_context/src/lib.rs
@@ -5,7 +5,12 @@ use indexmap::IndexMap;
 use interpreter::Value;
 use schedule::{rulesets, CompilerPass};
 use schema::TreeProgram;
-use std::{collections::HashSet, fmt::Write, i64};
+use std::{
+    collections::HashSet,
+    fmt::Write,
+    i64,
+    time::{Duration, Instant},
+};
 use to_egglog::TreeToEgglog;
 
 use crate::{
@@ -371,10 +376,14 @@ impl Default for EggccConfig {
 
 // Optimizes a tree program using the given schedule.
 // Adds context to the program before optimizing.
+// If successful, returns the optimized program and the time
+// it takes for serialization and extraction
 pub fn optimize(
     program: &TreeProgram,
     eggcc_config: &EggccConfig,
-) -> std::result::Result<TreeProgram, egglog::Error> {
+) -> std::result::Result<(TreeProgram, Duration, Duration), egglog::Error> {
+    let mut eggcc_serialization_time = Duration::from_millis(0);
+    let mut eggcc_extraction_time = Duration::from_millis(0);
     let schedule_list = eggcc_config.schedule.get_schedule_list();
     let mut res = program.clone();
 
@@ -430,8 +439,10 @@ pub fn optimize(
             let mut egraph = egglog::EGraph::default();
             egraph.parse_and_run_program(None, &egglog_prog)?;
 
+            let serialization_start = Instant::now();
             let (serialized, unextractables) = serialized_egraph(egraph);
 
+            let extraction_start = Instant::now();
             let mut termdag = egglog::TermDag::default();
             let has_debug_exprs = has_debug_exprs(&serialized);
             if has_debug_exprs {
@@ -450,21 +461,26 @@ pub fn optimize(
                 has_debug_exprs,
             );
 
+            let extraction_end = Instant::now();
+
+            eggcc_extraction_time += extraction_end - extraction_start;
+            eggcc_serialization_time += extraction_start - serialization_start;
+
             // typecheck the program as a sanity check
             iter_result.typecheck();
 
             res = iter_result;
 
             if has_debug_exprs {
                 log::info!("Program has debug expressions, stopping pass {}.", i);
-                return Ok(res);
+                return Ok((res, eggcc_serialization_time, eggcc_extraction_time));
             }
         }
 
         // now add context to res again for the next pass, since context might be less specific
         res = res.add_context().0;
     }
-    Ok(res)
+    Ok((res, eggcc_serialization_time, eggcc_extraction_time))
 }
 
 fn check_program_gets_type(program: TreeProgram) -> Result {

diff --git a/infra/generate_cfgs.py b/infra/generate_cfgs.py
@@ -25,7 +25,7 @@ def make_cfgs(bench, data_dir):
 
     # https://llvm.org/docs/Passes.html#dot-cfg-print-cfg-of-function-to-dot-file
     # spawn a shell in the path and run opt
-    opt_res = subprocess.run(f"{opt} -disable-output -passes=dot-cfg optimized.ll", shell=True, cwd=path)
+    opt_res = subprocess.run(f"{opt} -disable-output -passes=dot-cfg optimized.ll", shell=True, cwd=path, capture_output=True)
     if opt_res.returncode != 0:
       print(f"Error running opt on {path}/optimized.ll")
       exit(1)
@@ -39,7 +39,7 @@ def make_cfgs(bench, data_dir):
 
       # Convert to png
       cmd = f"dot -Tpng -o {path}/{name}.png {path}/{dot}"
-      dot_res = subprocess.run(cmd, shell=True).returncode
+      dot_res = subprocess.run(cmd, shell=True, capture_output=True).returncode
       if dot_res != 0:
         print(f"Error converting {dot} to png")
         exit(1)

diff --git a/infra/graphs.py b/infra/graphs.py
@@ -66,6 +66,9 @@ def get_cycles(data, benchmark_name, run_method):
 def get_eggcc_compile_time(data, benchmark_name):
   return get_row(data, benchmark_name, 'llvm-eggcc-O0-O0').get('eggccCompileTimeSecs')
 
+def get_eggcc_extraction_time(data, benchmark_name):
+  return get_row(data, benchmark_name, 'llvm-eggcc-O0-O0').get('eggccExtractionTimeSecs')
+
 def group_by_benchmark(profile):
   grouped_by_benchmark = {}
   for benchmark in profile:
@@ -307,25 +310,46 @@ def get_code_size(benchmark, suites_path):
   raise KeyError(f"Unsupported file type for benchmark {benchmark}: {file}")
 
 
-def make_code_size_vs_compile_time(profile, output, suites_path):
+def make_code_size_vs_compile_and_extraction_time(profile, compile_time_output, extraction_time_output, ratio_output, suites_path):
   benchmarks = dedup([b.get('benchmark') for b in profile])
 
   data = []
   for benchmark in benchmarks:
     compile_time = get_eggcc_compile_time(profile, benchmark)
+    extraction_time = get_eggcc_extraction_time(profile, benchmark)
     code_size = get_code_size(benchmark, suites_path)
-    data.append((code_size, compile_time))
+    if code_size > 300:
+      continue
+    data.append((code_size, compile_time, extraction_time))
 
   x = [d[0] for d in data]
-  y = [d[1] for d in data]
+  y1 = [d[1] for d in data]
+  y2 = [d[2] for d in data]
+  y3 = [d[2] / d[1] for d in data]
 
   # graph data
   plt.figure(figsize=(10, 6))
-  plt.scatter(x, y)
+  plt.scatter(x, y1)
   plt.xlabel('Bril Number of Instructions')
   plt.ylabel('EggCC Compile Time (s)')
   plt.title('EggCC Compile Time vs Code Size')
-  plt.savefig(output)
+  plt.savefig(compile_time_output)
+
+
+  plt.figure(figsize=(10, 6))
+  plt.scatter(x, y2)
+  plt.xlabel('Bril Number of Instructions')
+  plt.ylabel('EggCC Extraction Time (s)')
+  plt.title('EggCC Extraction Time vs Code Size')
+  plt.savefig(extraction_time_output)
+
+  plt.figure(figsize=(10, 6))
+  plt.scatter(x, y3)
+  plt.xlabel('Bril Number of Instructions')
+  plt.ylabel('Extraction Ratio')
+  plt.title('EggCC Compile Time vs Extraction Time')
+  plt.savefig(ratio_output)
+
 
 
 
@@ -359,7 +383,12 @@ def make_code_size_vs_compile_time(profile, output, suites_path):
 
   make_macros(profile, benchmark_suites, f'{output_folder}/nightlymacros.tex')
 
-  make_code_size_vs_compile_time(profile, f'{graphs_folder}/code_size_vs_compile_time.png', benchmark_suite_folder)
+  make_code_size_vs_compile_and_extraction_time(
+    profile, 
+    f'{graphs_folder}/code_size_vs_compile_time.png', 
+    f'{graphs_folder}/code_size_vs_extraction_time.png', 
+    f'{graphs_folder}/extraction_ratio.png',
+    benchmark_suite_folder)
 
   # make json list of graph names and put in in output
   graph_names = []

diff --git a/infra/nightly-resources/data.js b/infra/nightly-resources/data.js
@@ -92,17 +92,23 @@ function getOverallStatistics(suite) {
     }
 
     const eggcc_compile_times = [];
+    const eggcc_extraction_times = [];
+    const eggcc_serialization_times = [];
     const llvm_compile_times = [];
     for (const benchmark of benchmarks) {
       const row = getRow(benchmark, treatment);
       eggcc_compile_times.push(row.eggccCompileTimeSecs);
+      eggcc_extraction_times.push(row.eggccExtractionTimeSecs);
+      eggcc_serialization_times.push(row.eggccSerializationTimeSecs);
       llvm_compile_times.push(row.llvmCompileTimeSecs);
     }
 
     result.push({
       Treatment: treatment,
       "Normalized Mean": tryRound(geometricMean(normalized_cycles)),
       "Eggcc Compile Time": tryRound(mean(eggcc_compile_times)),
+      "Eggcc Serialization Time": tryRound(mean(eggcc_serialization_times)),
+      "Eggcc Extraction Time": tryRound(mean(eggcc_extraction_times)),
       "LLVM Compile Time": tryRound(mean(llvm_compile_times)),
     });
   }
@@ -131,6 +137,14 @@ function getDataForBenchmark(benchmark) {
           class: "",
           value: tryRound(row.eggccCompileTimeSecs),
         },
+        eggccSerializationTimeSecs: {
+          class: "",
+          value: tryRound(row.eggccSerializationTimeSecs),
+        },
+        eggccExtractionTimeSecs: {
+          class: "",
+          value: tryRound(row.eggccExtractionTimeSecs),
+        },
         llvmCompileTimeSecs: {
           class: "",
           value: tryRound(row.llvmCompileTimeSecs),

diff --git a/infra/nightly-resources/index.js b/infra/nightly-resources/index.js
@@ -14,7 +14,7 @@ const GLOBAL_DATA = {
 };
 
 function treatments() {
-  return GLOBAL_DATA.currentRun.map((x) => x.runMethod);
+  return dedup(GLOBAL_DATA.currentRun.map((x) => x.runMethod));
 }
 
 // filter to all the benchmark names that are enabled

diff --git a/infra/profile.py b/infra/profile.py
@@ -149,12 +149,22 @@ def optimize(benchmark):
   process2.check_returncode()
 
   eggcc_compile_time = 0
+  eggcc_extraction_time = 0
+  eggcc_serialization_time = 0
   # parse json from eggcc run data
   with open(eggcc_run_data) as f:
     eggcc_data = json.load(f)
     secs = eggcc_data["eggcc_compile_time"]["secs"]
     nanos = eggcc_data["eggcc_compile_time"]["nanos"]
     eggcc_compile_time = secs + nanos / 1e9
+
+    secs = eggcc_data["eggcc_serialization_time"]["secs"]
+    nanos = eggcc_data["eggcc_serialization_time"]["nanos"]
+    eggcc_serialization_time = secs + nanos / 1e9
+
+    secs = eggcc_data["eggcc_extraction_time"]["secs"]
+    nanos = eggcc_data["eggcc_extraction_time"]["nanos"]
+    eggcc_extraction_time = secs + nanos / 1e9
 
   llvm_compile_time = 0
   with open(llvm_run_data) as f:
@@ -164,7 +174,13 @@ def optimize(benchmark):
     llvm_compile_time = secs + nanos / 1e9
 
 
-  res = {"path": f"{profile_dir}/{benchmark.treatment}", "eggccCompileTimeSecs": eggcc_compile_time, "llvmCompileTimeSecs": llvm_compile_time}
+    res = {
+        "path": f"{profile_dir}/{benchmark.treatment}",
+        "eggccCompileTimeSecs": eggcc_compile_time,
+        "eggccSerializationTimeSecs": eggcc_serialization_time,
+        "eggccExtractionTimeSecs": eggcc_extraction_time,
+        "llvmCompileTimeSecs": llvm_compile_time,
+    }
   return res
 
 
@@ -241,7 +257,6 @@ def get_suite(path):
 
     oldpath = path
     path = os.path.dirname(path)
-    print(os.path.basename(path))
     if os.path.basename(path) == "passing":
       return os.path.basename(oldpath)