Added new trace event count and added it to vector_scalar_mul (#2042)

Xilinx · Feb 13, 2025 · b2d9cfa · b2d9cfa
1 parent a608882
commit b2d9cfa
Show file tree

Hide file tree

Showing 3 changed files with 236 additions and 0 deletions.
diff --git a/programming_examples/basic/vector_scalar_mul/Makefile b/programming_examples/basic/vector_scalar_mul/Makefile
@@ -96,10 +96,12 @@ run_py: build/final_${data_size}.xclbin build/insts_${data_size}.txt
 trace: ${targetname}_${data_size}.exe build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt 
 	${powershell} ./$< -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size}
 	${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vs.json
+	${srcdir}/../../utils/get_trace_summary.py --filename trace_vs.json
 
 trace_py: build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
 	${powershell} python3 ${srcdir}/test.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -s ${data_size}
 	${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vs.json
+	${srcdir}/../../utils/get_trace_summary.py --filename trace_vs.json
 
 
 clean_trace:

diff --git a/programming_examples/utils/get_trace_summary.py b/programming_examples/utils/get_trace_summary.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python3
+import json
+import argparse
+import sys
+import re
+import trace_utils
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--filename", help="Trace file", required=True)
+    # parser.add_argument("--mlir", help="mlir source file", required=True)
+    # parser.add_argument(
+    #    "--colshift", help="column shift adjustment to source mlir", required=False
+    # )
+    parser.add_argument("--debug", help="debug mode", required=False)
+    # TODO tracelabels removed since we can have multiple sets of labels for each pkt_type & loc combination
+    # parser.add_argument('--tracelabels',
+    #         nargs='+',
+    #         help='Labels for traces', required=False)
+    return parser.parse_args(sys.argv[1:])
+
+
+opts = parse_args()
+cycles = trace_utils.get_cycles_summary(opts.filename)
+
+print("Total number of full kernel invocations is " + str(len(cycles)))
+print(
+    "First/Min/Avg/Max is "
+    + str(cycles[0])
+    + "/ "
+    + str(min(cycles))
+    + "/ "
+    + str(sum(cycles) / len(cycles))
+    + "/ "
+    + str(max(cycles))
+)
diff --git a/programming_examples/utils/trace_utils.py b/programming_examples/utils/trace_utils.py
@@ -0,0 +1,197 @@
+# Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+# from CppHeaderParser import CppHeader
+import numpy as np
+import subprocess
+import json
+import re
+import os
+
+
+def get_kernel_code(test: dict, solutions_path: str = None) -> str:
+    """Fetch the kernel code from the provided solution path, if none provided default
+    to canonical solution."""
+    if not solutions_path:
+        return test["prompt"] + test["canonical_solution"]
+
+    with open(
+        os.path.join(solutions_path, f"{test['kernel_name']}.json"), "r"
+    ) as sol_file:
+        solution = json.load(sol_file)
+        if not solution.get("code"):
+            print(f"No code available in {solutions_path} for {test['kernel_name']}")
+            return None
+
+        srccode = solution["code"]
+
+        # if gpt decides to be too helpful and adds a main()... remove it
+        srccode = re.sub(
+            r"int\s+main\s*\([^)]*\)\s*{[^{}]*({[^{}]*}[^{}]*)*}",
+            "",
+            srccode,
+            flags=re.DOTALL,
+        )
+
+        # cppheaderparser will complain if we don't remove trailing comments
+        srccode = srccode.split('// extern "C"')[0]
+
+        return srccode
+
+
+def extract_buffers(test):
+    """Specific helper for the AIEval dataset - parses the test dictionary and returns
+    input buffers, output buffers and RTPs as separate lists.
+    """
+    input_buffers = []
+    for x in test["test_vectors"]["inputs"]:
+        array, dtype = list(x.values())
+        input_buffers.append(np.array(array, dtype=dtype))
+
+    output_buffers = []
+    for x in test["test_vectors"]["outputs"]:
+        array, dtype = list(x.values())
+        output_buffers.append(np.array(array, dtype=dtype))
+
+    rtps = []
+    if test["test_vectors"].get("rtps") != None:
+        for rtp in test["test_vectors"]["rtps"]:
+            array, dtype = rtp.values()
+            rtps.append(np.array(array, dtype=dtype))
+            # rtp_names.append(list(rtp.keys())[0])
+
+    return input_buffers, output_buffers, rtps
+
+
+def trace_to_json(trace_file: str, mlir_file: str, output_name: str = "trace.json"):
+    """Subprocesses wrapper over parse_trace.py utility.
+    Parameters
+    ----------
+    trace_file : str
+        The .txt trace file of 32-byte codes.
+    mlir_file : str
+        Path to the corresponding MLIR file for the design being traced.
+    output_name : str, optional
+        Path to output json file. You can analyze it using tools like https://ui.perfetto.dev
+    """
+    command = [
+        os.environ["MLIR_AIE_INSTALL_DIR"]
+        + "/../../programming_examples/utils/parse_trace.py",
+        "--filename",
+        trace_file,
+        "--mlir",
+        mlir_file,
+        "--colshift",
+        "1",
+    ]
+
+    try:
+        result = subprocess.check_output(command, stderr=subprocess.STDOUT, text=True)
+        with open(output_name, "w") as f:
+            f.write(result)
+        print(f"Trace written to {output_name}")
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Trace failed\n{e.output}")
+        return e.output
+
+
+def get_cycles(trace_path):
+    """This helper function should only be used to extract cycle counts
+    from NPUEval trace files where the expectation is to have exactly 1 of
+    each event0 and event1.
+    """
+    with open(trace_path, "r") as f:
+        data = json.load(f)
+
+    event0 = []
+    event1 = []
+    try:
+        for x in data:
+            if (x["name"] == "INSTR_EVENT_0") and (x["ph"] == "B"):
+                event0.append(x["ts"])
+                tmp = x["ts"]
+                # print("event0 found at "+str(event0[0]))
+
+            if x["name"] == "INSTR_EVENT_1" and x["ph"] == "B":
+                event1.append(x["ts"])
+                # print("event1 found at "+str(event1[0]))
+
+        return event1[0] - event0[0]
+    except:
+        return np.inf
+
+
+def get_cycles_summary(trace_path):
+    """This helper function is  used to extract cycle counts from a trace json
+    file and returns an array of cycles between pairs of event0 and event1.
+    This always assumes each event0 is followed by an event1 and ignores
+    extra event0 and event1's.
+    """
+    with open(trace_path, "r") as f:
+        data = json.load(f)
+
+    delta = []
+    in_kernel = False
+    event0 = 0
+    try:
+        for x in data:
+            if (x["name"] == "INSTR_EVENT_0") and (x["ph"] == "B"):
+                if in_kernel == False:
+                    event0 = x["ts"]
+                    # print("event0 found at "+str(event0))
+                    in_kernel = True
+
+            if x["name"] == "INSTR_EVENT_1" and x["ph"] == "B":
+                if in_kernel == True:
+                    # print("event1 found at "+str(x['ts']))
+                    delta.append(x["ts"] - event0)
+                    in_kernel = False
+
+        return delta
+    except:
+        print("Exceptin found?")
+        return np.inf
+
+
+def get_vector_time(trace):
+    """This function extracts the total time spent on the vectorized unit
+    from an NPUEval AIE trace (this must have exactly 1 event0 and 1 event1
+    sandwiching the kernel call).
+    """
+    with open(trace, "r") as f:
+        data = json.load(f)
+
+    start, end = None, None
+
+    # find start and end
+    for x in data:
+        if (x["name"] == "INSTR_EVENT_0") and (x["ph"] == "B"):
+            start = x["ts"]
+        if x["name"] == "INSTR_EVENT_1" and x["ph"] == "B":
+            end = x["ts"]
+
+    if not start or not end:
+        return 0
+
+    total_duration = 0
+    stack = []
+
+    for event in data:
+        if event["name"] == "INSTR_VECTOR":
+            if event["ts"] < start:
+                continue
+
+            if event["ts"] > end:
+                continue
+
+            if event["ph"] == "B":
+                stack.append(event)
+            elif event["ph"] == "E" and stack:
+                # Get matching begin event
+                begin_event = stack.pop()
+                # Calculate duration for this pair
+                duration = event["ts"] - begin_event["ts"]
+                total_duration += duration
+
+    return total_duration / (end - start)