Skip to content

Commit

Permalink
#17473: Add per core kernel stats and first start to last start
Browse files Browse the repository at this point in the history
  • Loading branch information
mo-tenstorrent committed Feb 5, 2025
1 parent 5815e04 commit 67cd4ee
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 17 deletions.
22 changes: 22 additions & 0 deletions tt_metal/tools/profiler/device_post_proc_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,28 @@ class default_setup(metaclass=MergeMetaclass):
]

timerAnalysis = {
"device_kernel_first_to_last_start": {
"across": "ops",
"type": "op_first_last",
"start": {
"core": "ANY",
"risc": "ANY",
"zone_phase": "ZONE_START",
"zone_name": [f"{risc}-KERNEL" for risc in riscTypes],
},
"end": {
"core": "ANY",
"risc": "ANY",
"zone_phase": "ZONE_START",
"zone_name": [f"{risc}-KERNEL" for risc in riscTypes],
},
},
"device_kernel_duration_per_core": {
"across": "ops",
"type": "op_core_first_last",
"start": {"core": "ANY", "risc": "ANY", "zone_name": [f"{risc}-KERNEL" for risc in riscTypes]},
"end": {"core": "ANY", "risc": "ANY", "zone_name": [f"{risc}-KERNEL" for risc in riscTypes]},
},
"device_fw_duration": {
"across": "ops",
"type": "op_first_last",
Expand Down
33 changes: 28 additions & 5 deletions tt_metal/tools/profiler/process_device_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,8 @@ def get_ops(timeseries):
opCores[core] = (timerID,)
if len(ts) == 4:
timerID, tsValue, statData, risc = ts
if (risc == "BRISC" and timerID["zone_name"] == "BRISC-FW" and timerID["type"] == "ZONE_START") or (
risc == "ERISC" and timerID["zone_name"] == "ERISC-FW" and timerID["type"] == "ZONE_START"
if (risc == "BRISC" and timerID["zone_name"] == "BRISC-FW" and timerID["type"] == "ZONE_END") or (
risc == "ERISC" and timerID["zone_name"] == "ERISC-FW" and timerID["type"] == "ZONE_END"
):
opIsDone = True
ops[-1]["timeseries"].append(ts)
Expand Down Expand Up @@ -436,16 +436,19 @@ def translate_metaData(metaData, core, risc):
def determine_conditions(timerID, metaData, analysis):
currCore = analysis["start"]["core"] if "core" in analysis["start"].keys() else None
currRisc = analysis["start"]["risc"]
currStart = (timerID["zone_name"],) + translate_metaData(metaData, currCore, currRisc)
currPhase = (timerID["type"],) if "zone_phase" in analysis["start"].keys() else (None,)
currStart = (timerID["zone_name"],) + currPhase + translate_metaData(metaData, currCore, currRisc)

currCore = analysis["end"]["core"] if "core" in analysis["end"].keys() else None
currRisc = analysis["end"]["risc"]
currEnd = (timerID["zone_name"],) + translate_metaData(metaData, currCore, currRisc)
currPhase = (timerID["type"],) if "zone_phase" in analysis["end"].keys() else (None,)
currEnd = (timerID["zone_name"],) + currPhase + translate_metaData(metaData, currCore, currRisc)

if type(analysis["start"]["zone_name"]) == list:
desStart = [
(
zoneName,
analysis["start"]["zone_phase"] if "zone_phase" in analysis["start"].keys() else None,
analysis["start"]["core"] if "core" in analysis["start"].keys() else None,
analysis["start"]["risc"],
)
Expand All @@ -455,6 +458,7 @@ def determine_conditions(timerID, metaData, analysis):
desStart = [
(
analysis["start"]["zone_name"],
analysis["start"]["zone_phase"] if "zone_phase" in analysis["start"].keys() else None,
analysis["start"]["core"] if "core" in analysis["start"].keys() else None,
analysis["start"]["risc"],
)
Expand All @@ -464,6 +468,7 @@ def determine_conditions(timerID, metaData, analysis):
desEnd = [
(
zoneName,
analysis["end"]["zone_phase"] if "zone_phase" in analysis["end"].keys() else None,
analysis["end"]["core"] if "core" in analysis["end"].keys() else None,
analysis["end"]["risc"],
)
Expand All @@ -473,6 +478,7 @@ def determine_conditions(timerID, metaData, analysis):
desEnd = [
(
analysis["end"]["zone_name"],
analysis["end"]["zone_phase"] if "zone_phase" in analysis["end"].keys() else None,
analysis["end"]["core"] if "core" in analysis["end"].keys() else None,
analysis["end"]["risc"],
)
Expand Down Expand Up @@ -506,7 +512,6 @@ def first_last_analysis(timeseries, analysis):
)
)
break

return durations


Expand All @@ -518,6 +523,22 @@ def op_first_last_analysis(riscData, analysis):
return first_last_analysis(riscData["timeseries"], analysis)


def op_core_first_last_analysis(riscData, analysis):
core_ops = {}
durations = []
for ts in riscData["timeseries"]:
assert len(ts) == 5
core = ts[4]
if core in core_ops:
core_ops[core].append(ts)
else:
core_ops[core] = [ts]
for core, timeseries in core_ops.items():
durations.append(first_last_analysis(timeseries, analysis)[0])

return durations


def get_duration(riscData, analysis):
totalDuration = 0
for index, (timerID, timestamp, statData, risc, core) in enumerate(riscData["timeseries"]):
Expand Down Expand Up @@ -564,6 +585,8 @@ def timeseries_analysis(riscData, name, analysis):
tmpList = session_first_last_analysis(riscData, analysis)
elif analysis["type"] == "op_first_last":
tmpList = op_first_last_analysis(riscData, analysis)
elif analysis["type"] == "op_core_first_last":
tmpList = op_core_first_last_analysis(riscData, analysis)
elif analysis["type"] == "sum":
tmpList = get_duration(riscData, analysis)
else:
Expand Down
52 changes: 40 additions & 12 deletions tt_metal/tools/profiler/process_ops_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
"OP TO OP LATENCY [ns]",
"DEVICE FW DURATION [ns]",
"DEVICE KERNEL DURATION [ns]",
"DEVICE KERNEL DURATION PER CORE MIN [ns]",
"DEVICE KERNEL DURATION PER CORE MAX [ns]",
"DEVICE KERNEL DURATION PER CORE AVG [ns]",
"DEVICE KERNEL FIRST TO LAST START [ns]",
"DEVICE BRISC KERNEL DURATION [ns]",
"DEVICE NCRISC KERNEL DURATION [ns]",
"DEVICE TRISC0 KERNEL DURATION [ns]",
Expand Down Expand Up @@ -349,10 +353,11 @@ def append_device_data(ops, traceReplays, logFolder):
cores.add(core)
deviceOp["core_usage"] = {"count": len(cores), "cores": [str(core) for core in cores]}
deviceOp["device_time"] = {
analysis: data["series"] for analysis, data in deviceOpTime["analysis"].items()
analysis: {"series": data["series"], "stats": data["stats"]}
for analysis, data in deviceOpTime["analysis"].items()
}
for analysis, data in deviceOp["device_time"].items():
for sample in data:
for sample in data["series"]:
sample["duration_ns"] = sample["duration_cycles"] * 1000 / freq
traceOps = {}

Expand Down Expand Up @@ -422,23 +427,35 @@ def get_device_data_generate_report(
cores.add(core)
deviceOp["core_usage"] = {"count": len(cores), "cores": [str(core) for core in cores]}
deviceOp["device_time"] = {
analysis: data["series"] for analysis, data in deviceOpTime["analysis"].items()
analysis: {"series": data["series"], "stats": data["stats"]}
for analysis, data in deviceOpTime["analysis"].items()
}

if "run_host_id" in timeID.keys():
deviceOp["global_call_count"] = timeID["run_host_id"]
else:
deviceOp["global_call_count"] = i
for analysis, data in deviceOp["device_time"].items():
for sample in data:
for sample in data["series"]:
sample["duration_ns"] = sample["duration_cycles"] * 1000 / freq
deviceOps[device].append(deviceOp)

rowDict = {csv_header_format("global_call_count"): deviceOp["global_call_count"]}
for analysis, analysisData in deviceOp["device_time"].items():
headerField = f"{csv_header_format(analysis)} [ns]"
assert len(analysisData) == 1, "Unexpected device data format"
rowDict[headerField] = f"{analysisData[0]['duration_ns']:.0f}"
for analysis, data in deviceOp["device_time"].items():
analysisData = data["series"]
analysisStats = data["stats"]
if "core" in analysis:
assert len(analysisData) >= 1, "Unexpected device data format"
headerField = f"{csv_header_format(analysis)} MIN [ns]"
rowDict[headerField] = f"{analysisStats['Min']:.0f}"
headerField = f"{csv_header_format(analysis)} MAX [ns]"
rowDict[headerField] = f"{analysisStats['Max']:.0f}"
headerField = f"{csv_header_format(analysis)} AVG [ns]"
rowDict[headerField] = f"{analysisStats['Average']:.0f}"
else:
headerField = f"{csv_header_format(analysis)} [ns]"
assert len(analysisData) == 1, "Unexpected device data format"
rowDict[headerField] = f"{analysisData[0]['duration_ns']:.0f}"
if analysis == "device_fw_duration":
rowDict["DEVICE FW START CYCLE"] = analysisData[0]["start_cycle"]
rowDict["DEVICE FW END CYCLE"] = analysisData[0]["end_cycle"]
Expand Down Expand Up @@ -646,10 +663,21 @@ def row_compare(row):
if "device_time" in opData.keys():
assert "device_id" in opData.keys(), "Op has device data without device_id"
deviceID = opData["device_id"]
for analysis, analysisData in opData["device_time"].items():
headerField = f"{csv_header_format(analysis)} [ns]"
assert len(analysisData) == 1, "Unexpected device data format"
rowDict[headerField] = f"{analysisData[0]['duration_ns']:.0f}"
for analysis, data in opData["device_time"].items():
analysisData = data["series"]
analysisStats = data["stats"]
if "core" in analysis:
assert len(analysisData) >= 1, "Unexpected device data format"
headerField = f"{csv_header_format(analysis)} MIN [ns]"
rowDict[headerField] = f"{analysisStats['Min']:.0f}"
headerField = f"{csv_header_format(analysis)} MAX [ns]"
rowDict[headerField] = f"{analysisStats['Max']:.0f}"
headerField = f"{csv_header_format(analysis)} AVG [ns]"
rowDict[headerField] = f"{analysisStats['Average']:.0f}"
else:
headerField = f"{csv_header_format(analysis)} [ns]"
assert len(analysisData) == 1, "Unexpected device data format"
rowDict[headerField] = f"{analysisData[0]['duration_ns']:.0f}"
if analysis == "device_fw_duration":
rowDict["DEVICE FW START CYCLE"] = analysisData[0]["start_cycle"]
rowDict["DEVICE FW END CYCLE"] = analysisData[0]["end_cycle"]
Expand Down

0 comments on commit 67cd4ee

Please sign in to comment.