diff --git a/drivers/gpu/drm/ci/xfails/requirements.txt b/drivers/gpu/drm/ci/xfails/requirements.txt index 5e6d48d98e4e..a249c25dfb5c 100644 --- a/drivers/gpu/drm/ci/xfails/requirements.txt +++ b/drivers/gpu/drm/ci/xfails/requirements.txt @@ -4,7 +4,7 @@ termcolor==2.3.0 # ci-collate dependencies certifi==2023.7.22 charset-normalizer==3.2.0 -idna==3.4 +idna==3.7 pip==23.3 python-gitlab==3.15.0 requests==2.31.0 diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index c55878bddfdd..f15bafb9fe97 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -827,6 +827,9 @@ def print_one(self, helper): print(' *{}{}'.format(' \t' if line else '', line)) print(' */') + fptr_type = '%s%s(*)(' % ( + self.map_type(proto['ret_type']), + ((' ' + proto['ret_star']) if proto['ret_star'] else '')) print('static %s %s(* const %s)(' % (self.map_type(proto['ret_type']), proto['ret_star'], proto['name']), end='') comma = '' @@ -845,8 +848,10 @@ def print_one(self, helper): one_arg += '{}'.format(n) comma = ', ' print(one_arg, end='') + fptr_type += one_arg - print(') = (void *) %d;' % helper.enum_val) + fptr_type += ')' + print(') = (%s) %d;' % (fptr_type, helper.enum_val)) print('') ############################################################################### diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f5b81d439387..c9a8da5bfc56 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,6 +39,8 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents pmu-events/metric_test.log +pmu-events/arch/**/extra-metrics.json +pmu-events/arch/**/extra-metricgroups.json tests/shell/*.shellcheck_log tests/shell/coresight/*.shellcheck_log tests/shell/lib/*.shellcheck_log diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9dd2e8d3f3c9..75b278497526 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1265,9 +1265,26 @@ endif # CONFIG_PERF_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) $(SKEL_OUT)/vmlinux.h +pmu-events-clean: +ifeq ($(OUTPUT),) + $(call QUIET_CLEAN, pmu-events) $(RM) \ + pmu-events/pmu-events.c \ + pmu-events/metric_test.log \ + pmu-events/test-empty-pmu-events.c \ + pmu-events/empty-pmu-events.log + $(Q)find pmu-events/arch -name 'extra-metrics.json' -delete -o \ + -name 'extra-metricgroups.json' -delete +else + $(call QUIET_CLEAN, pmu-events) $(RM) -r $(OUTPUT)pmu-events/arch \ + $(OUTPUT)pmu-events/pmu-events.c \ + $(OUTPUT)pmu-events/metric_test.log \ + $(OUTPUT)pmu-events/test-empty-pmu-events.c \ + $(OUTPUT)pmu-events/empty-pmu-events.log +endif + clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean \ arm64-sysreg-defs-clean fixdep-clean python-clean bpf-skel-clean \ - tests-coresight-targets-clean + tests-coresight-targets-clean pmu-events-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive \ $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '*.a' -delete -o \ @@ -1280,10 +1297,6 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $( $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ - $(OUTPUT)pmu-events/pmu-events.c \ - $(OUTPUT)pmu-events/test-empty-pmu-events.c \ - $(OUTPUT)pmu-events/empty-pmu-events.log \ - $(OUTPUT)pmu-events/metric_test.log \ $(OUTPUT)$(fadvise_advice_array) \ $(OUTPUT)$(fsconfig_arrays) \ $(OUTPUT)$(fsmount_arrays) \ diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index d941bc9d16e9..91b6837e32c9 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -1,7 +1,6 @@ pmu-events-y += pmu-events.o JDIR = pmu-events/arch/$(SRCARCH) -JSON = $(shell [ -d $(JDIR) ] && \ - find $(JDIR) -name '*.json' -o -name 'mapfile.csv') +JSON = $(shell find pmu-events/arch -name *.json -o -name *.csv) JDIR_TEST = pmu-events/arch/test JSON_TEST = $(shell [ -d $(JDIR_TEST) ] && \ find $(JDIR_TEST) -name '*.json') @@ -29,6 +28,61 @@ $(PMU_EVENTS_C): $(EMPTY_PMU_EVENTS_C) $(call rule_mkdir) $(Q)$(call echo-cmd,gen)cp $< $@ else +# Extract the model from a extra-metrics.json or extra-metricgroups.json path +model_name = $(shell echo $(1)|sed -e 's@.\+/\(.*\)/extra-metric.*\.json@\1@') +vendor_name = $(shell echo $(1)|sed -e 's@.\+/\(.*\)/[^/]*/extra-metric.*\.json@\1@') + +# Copy checked-in json for generation. +$(OUTPUT)pmu-events/arch/%: pmu-events/arch/% + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)cp $< $@ + +GEN_METRIC_DEPS := pmu-events/metric.py pmu-events/common_metrics.py + +# Generate AMD Json +ZENS = $(shell ls -d pmu-events/arch/x86/amdzen*) +ZEN_METRICS = $(foreach x,$(ZENS),$(OUTPUT)$(x)/extra-metrics.json) +ZEN_METRICGROUPS = $(foreach x,$(ZENS),$(OUTPUT)$(x)/extra-metricgroups.json) + +$(ZEN_METRICS): pmu-events/amd_metrics.py $(GEN_METRIC_DEPS) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(PYTHON) $< $(call model_name,$@) pmu-events/arch > $@ + +$(ZEN_METRICGROUPS): pmu-events/amd_metrics.py $(GEN_METRIC_DEPS) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(PYTHON) $< -metricgroups $(call model_name,$@) pmu-events/arch > $@ + +# Generate ARM Json +ARMS = $(shell ls -d pmu-events/arch/arm64/arm/*) +ARM_METRICS = $(foreach x,$(ARMS),$(OUTPUT)$(x)/extra-metrics.json) +ARM_METRICGROUPS = $(foreach x,$(ARMS),$(OUTPUT)$(x)/extra-metricgroups.json) + +$(ARM_METRICS): pmu-events/arm64_metrics.py $(GEN_METRIC_DEPS) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(PYTHON) $< $(call vendor_name,$@) $(call model_name,$@) pmu-events/arch > $@ + +$(ARM_METRICGROUPS): pmu-events/arm64_metrics.py $(GEN_METRIC_DEPS) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(PYTHON) $< -metricgroups $(call vendor_name,$@) $(call model_name,$@) pmu-events/arch > $@ + +# Generate Intel Json +INTELS = $(shell ls -d pmu-events/arch/x86/*|grep -v amdzen|grep -v mapfile.csv) +INTEL_METRICS = $(foreach x,$(INTELS),$(OUTPUT)$(x)/extra-metrics.json) +INTEL_METRICGROUPS = $(foreach x,$(INTELS),$(OUTPUT)$(x)/extra-metricgroups.json) + +$(INTEL_METRICS): pmu-events/intel_metrics.py $(GEN_METRIC_DEPS) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(PYTHON) $< $(call model_name,$@) pmu-events/arch > $@ + +$(INTEL_METRICGROUPS): pmu-events/intel_metrics.py $(GEN_METRIC_DEPS) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(PYTHON) $< -metricgroups $(call model_name,$@) pmu-events/arch > $@ + +GEN_JSON = $(patsubst %,$(OUTPUT)%,$(JSON)) \ + $(ZEN_METRICS) $(ZEN_METRICGROUPS) \ + $(ARM_METRICS) $(ARM_METRICGROUPS) \ + $(INTEL_METRICS) $(INTEL_METRICGROUPS) + $(METRIC_TEST_LOG): $(METRIC_TEST_PY) $(METRIC_PY) $(call rule_mkdir) $(Q)$(call echo-cmd,test)$(PYTHON) $< 2> $@ || (cat $@ && false) @@ -41,9 +95,9 @@ $(EMPTY_PMU_EVENTS_TEST_LOG): $(EMPTY_PMU_EVENTS_C) $(TEST_EMPTY_PMU_EVENTS_C) $(call rule_mkdir) $(Q)$(call echo-cmd,test)diff -u $^ 2> $@ || (cat $@ && false) -$(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG) $(EMPTY_PMU_EVENTS_TEST_LOG) +$(PMU_EVENTS_C): $(GEN_JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG) $(EMPTY_PMU_EVENTS_TEST_LOG) $(call rule_mkdir) - $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@ + $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) $(OUTPUT)pmu-events/arch $@ endif # pmu-events.c file is generated in the OUTPUT directory so it needs a diff --git a/tools/perf/pmu-events/amd_metrics.py b/tools/perf/pmu-events/amd_metrics.py new file mode 100755 index 000000000000..8dbd4041d6f1 --- /dev/null +++ b/tools/perf/pmu-events/amd_metrics.py @@ -0,0 +1,687 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +from metric import (d_ratio, has_event, max, Event, JsonEncodeMetric, + JsonEncodeMetricGroupDescriptions, Literal, LoadEvents, + Metric, MetricGroup, Select) +import argparse +from common_metrics import Cycles +import json +import math +import os +from typing import Optional + +# Global command line arguments. +_args = None +_zen_model: int = 1 +interval_sec = Event("duration_time") +ins = Event("instructions") +cycles = Event("cycles") +# Number of CPU cycles scaled for SMT. +smt_cycles = Select(cycles / 2, Literal("#smt_on"), cycles) + +def AmdBr(): + def Total() -> MetricGroup: + br = Event("ex_ret_brn") + br_m_all = Event("ex_ret_brn_misp") + br_clr = Event("ex_ret_msprd_brnch_instr_dir_msmtch", "ex_ret_brn_resync") + + br_r = d_ratio(br, interval_sec) + ins_r = d_ratio(ins, br) + misp_r = d_ratio(br_m_all, br) + clr_r = d_ratio(br_clr, interval_sec) + + return MetricGroup("br_total", [ + Metric("br_total_retired", + "The number of branch instructions retired per second.", br_r, + "insn/s"), + Metric( + "br_total_mispred", + "The number of branch instructions retired, of any type, that were " + "not correctly predicted as a percentage of all branch instrucions.", + misp_r, "100%"), + Metric("br_total_insn_between_branches", + "The number of instructions divided by the number of branches.", + ins_r, "insn"), + Metric("br_total_insn_fe_resteers", + "The number of resync branches per second.", clr_r, "req/s") + ]) + + def Taken() -> MetricGroup: + br = Event("ex_ret_brn_tkn") + br_m_tk = Event("ex_ret_brn_tkn_misp") + br_r = d_ratio(br, interval_sec) + ins_r = d_ratio(ins, br) + misp_r = d_ratio(br_m_tk, br) + return MetricGroup("br_taken", [ + Metric("br_taken_retired", + "The number of taken branches that were retired per second.", + br_r, "insn/s"), + Metric( + "br_taken_mispred", + "The number of retired taken branch instructions that were " + "mispredicted as a percentage of all taken branches.", misp_r, + "100%"), + Metric( + "br_taken_insn_between_branches", + "The number of instructions divided by the number of taken branches.", + ins_r, "insn"), + ]) + + def Conditional() -> Optional[MetricGroup]: + global _zen_model + br = Event("ex_ret_cond") + br_r = d_ratio(br, interval_sec) + ins_r = d_ratio(ins, br) + + metrics = [ + Metric("br_cond_retired", "Retired conditional branch instructions.", + br_r, "insn/s"), + Metric("br_cond_insn_between_branches", + "The number of instructions divided by the number of conditional " + "branches.", ins_r, "insn"), + ] + if _zen_model == 2: + br_m_cond = Event("ex_ret_cond_misp") + misp_r = d_ratio(br_m_cond, br) + metrics += [ + Metric("br_cond_mispred", + "Retired conditional branch instructions mispredicted as a " + "percentage of all conditional branches.", misp_r, "100%"), + ] + + return MetricGroup("br_cond", metrics) + + def Fused() -> MetricGroup: + br = Event("ex_ret_fused_instr", "ex_ret_fus_brnch_inst") + br_r = d_ratio(br, interval_sec) + ins_r = d_ratio(ins, br) + return MetricGroup("br_cond", [ + Metric("br_fused_retired", + "Retired fused branch instructions per second.", br_r, "insn/s"), + Metric( + "br_fused_insn_between_branches", + "The number of instructions divided by the number of fused " + "branches.", ins_r, "insn"), + ]) + + def Far() -> MetricGroup: + br = Event("ex_ret_brn_far") + br_r = d_ratio(br, interval_sec) + ins_r = d_ratio(ins, br) + return MetricGroup("br_far", [ + Metric("br_far_retired", "Retired far control transfers per second.", + br_r, "insn/s"), + Metric( + "br_far_insn_between_branches", + "The number of instructions divided by the number of far branches.", + ins_r, "insn"), + ]) + + return MetricGroup("br", [Total(), Taken(), Conditional(), Fused(), Far()], + description="breakdown of retired branch instructions") + + +def AmdCtxSw() -> MetricGroup: + cs = Event("context\-switches") + metrics = [ + Metric("cs_rate", "Context switches per second", d_ratio(cs, interval_sec), "ctxsw/s") + ] + + ev = Event("instructions") + metrics.append(Metric("cs_instr", "Instructions per context switch", + d_ratio(ev, cs), "instr/cs")) + + ev = Event("cycles") + metrics.append(Metric("cs_cycles", "Cycles per context switch", + d_ratio(ev, cs), "cycles/cs")) + + ev = Event("ls_dispatch.ld_dispatch") + metrics.append(Metric("cs_loads", "Loads per context switch", + d_ratio(ev, cs), "loads/cs")) + + ev = Event("ls_dispatch.store_dispatch") + metrics.append(Metric("cs_stores", "Stores per context switch", + d_ratio(ev, cs), "stores/cs")) + + ev = Event("ex_ret_brn_tkn") + metrics.append(Metric("cs_br_taken", "Branches taken per context switch", + d_ratio(ev, cs), "br_taken/cs")) + + return MetricGroup("cs", metrics, + description = ("Number of context switches per second, instructions " + "retired & core cycles between context switches")) + + +def AmdIlp() -> MetricGroup: + tsc = Event("msr/tsc/") + c0 = Event("msr/mperf/") + low = tsc - c0 + inst_ret = Event("ex_ret_instr") + inst_ret_c = [Event(f"{inst_ret.name}/cmask={x}/") for x in range(1, 6)] + ilp = [d_ratio(max(inst_ret_c[x] - inst_ret_c[x + 1], 0), cycles) for x in range(0, 4)] + ilp.append(d_ratio(inst_ret_c[4], cycles)) + ilp0 = 1 + for x in ilp: + ilp0 -= x + return MetricGroup("ilp", [ + Metric("ilp_idle", "Lower power cycles as a percentage of all cycles", + d_ratio(low, tsc), "100%"), + Metric("ilp_inst_ret_0", "Instructions retired in 0 cycles as a percentage of all cycles", + ilp0, "100%"), + Metric("ilp_inst_ret_1", "Instructions retired in 1 cycles as a percentage of all cycles", + ilp[0], "100%"), + Metric("ilp_inst_ret_2", "Instructions retired in 2 cycles as a percentage of all cycles", + ilp[1], "100%"), + Metric("ilp_inst_ret_3", "Instructions retired in 3 cycles as a percentage of all cycles", + ilp[2], "100%"), + Metric("ilp_inst_ret_4", "Instructions retired in 4 cycles as a percentage of all cycles", + ilp[3], "100%"), + Metric("ilp_inst_ret_5", "Instructions retired in 5 or more cycles as a percentage of all cycles", + ilp[4], "100%"), + ]) + + +def AmdDtlb() -> Optional[MetricGroup]: + global _zen_model + if _zen_model >= 4: + return None + + d_dat = Event("ls_dc_accesses") if _zen_model <= 3 else None + d_h4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit") + d_hcoal = Event("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit") if _zen_model >= 2 else 0 + d_h2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit") + d_h1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit") + + d_m4k = Event("ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss") + d_mcoal = Event("ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss") if _zen_model >= 2 else 0 + d_m2m = Event("ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss") + d_m1g = Event("ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss") + + d_w0 = Event("ls_tablewalker.dc_type0") if _zen_model <= 3 else None + d_w1 = Event("ls_tablewalker.dc_type1") if _zen_model <= 3 else None + walks = d_w0 + d_w1 + walks_r = d_ratio(walks, interval_sec) + ins_w = d_ratio(ins, walks) + l1 = d_dat + l1_r = d_ratio(l1, interval_sec) + l2_hits = d_h4k + d_hcoal + d_h2m + d_h1g + l2_miss = d_m4k + d_mcoal + d_m2m + d_m1g + l2_r = d_ratio(l2_hits + l2_miss, interval_sec) + l1_miss = l2_hits + l2_miss + walks + l1_hits = max(l1 - l1_miss, 0) + ins_l = d_ratio(ins, l1_miss) + + return MetricGroup("dtlb", [ + MetricGroup("dtlb_ov", [ + Metric("dtlb_ov_insn_bt_l1_miss", + "DTLB overview: instructions between l1 misses.", ins_l, + "insns"), + Metric("dtlb_ov_insn_bt_walks", + "DTLB overview: instructions between dtlb page table walks.", + ins_w, "insns"), + ]), + MetricGroup("dtlb_l1", [ + Metric("dtlb_l1_hits", + "DTLB L1 hits as percentage of all DTLB L1 accesses.", + d_ratio(l1_hits, l1), "100%"), + Metric("dtlb_l1_miss", + "DTLB L1 misses as percentage of all DTLB L1 accesses.", + d_ratio(l1_miss, l1), "100%"), + Metric("dtlb_l1_reqs", "DTLB L1 accesses per second.", l1_r, + "insns/s"), + ]), + MetricGroup("dtlb_l2", [ + Metric("dtlb_l2_hits", + "DTLB L2 hits as percentage of all DTLB L2 accesses.", + d_ratio(l2_hits, l2_hits + l2_miss), "100%"), + Metric("dtlb_l2_miss", + "DTLB L2 misses as percentage of all DTLB L2 accesses.", + d_ratio(l2_miss, l2_hits + l2_miss), "100%"), + Metric("dtlb_l2_reqs", "DTLB L2 accesses per second.", l2_r, + "insns/s"), + MetricGroup("dtlb_l2_4kb", [ + Metric( + "dtlb_l2_4kb_hits", + "DTLB L2 4kb page size hits as percentage of all DTLB L2 4kb " + "accesses.", d_ratio(d_h4k, d_h4k + d_m4k), "100%"), + Metric( + "dtlb_l2_4kb_miss", + "DTLB L2 4kb page size misses as percentage of all DTLB L2 4kb" + "accesses.", d_ratio(d_m4k, d_h4k + d_m4k), "100%") + ]), + MetricGroup("dtlb_l2_coalesced", [ + Metric( + "dtlb_l2_coal_hits", + "DTLB L2 coalesced page (16kb) hits as percentage of all DTLB " + "L2 coalesced accesses.", d_ratio(d_hcoal, + d_hcoal + d_mcoal), "100%"), + Metric( + "dtlb_l2_coal_miss", + "DTLB L2 coalesced page (16kb) misses as percentage of all " + "DTLB L2 coalesced accesses.", + d_ratio(d_mcoal, d_hcoal + d_mcoal), "100%") + ]), + MetricGroup("dtlb_l2_2mb", [ + Metric( + "dtlb_l2_2mb_hits", + "DTLB L2 2mb page size hits as percentage of all DTLB L2 2mb " + "accesses.", d_ratio(d_h2m, d_h2m + d_m2m), "100%"), + Metric( + "dtlb_l2_2mb_miss", + "DTLB L2 2mb page size misses as percentage of all DTLB L2 " + "accesses.", d_ratio(d_m2m, d_h2m + d_m2m), "100%") + ]), + MetricGroup("dtlb_l2_1g", [ + Metric( + "dtlb_l2_1g_hits", + "DTLB L2 1gb page size hits as percentage of all DTLB L2 1gb " + "accesses.", d_ratio(d_h1g, d_h1g + d_m1g), "100%"), + Metric( + "dtlb_l2_1g_miss", + "DTLB L2 1gb page size misses as percentage of all DTLB L2 " + "1gb accesses.", d_ratio(d_m1g, d_h1g + d_m1g), "100%") + ]), + ]), + MetricGroup("dtlb_walks", [ + Metric("dtlb_walks_reqs", "DTLB page table walks per second.", + walks_r, "walks/s"), + ]), + ], description="Data TLB metrics") + + +def AmdItlb(): + global _zen_model + l2h = Event("bp_l1_tlb_miss_l2_tlb_hit", "bp_l1_tlb_miss_l2_hit") + l2m = Event("l2_itlb_misses") + l2r = l2h + l2m + + itlb_l1_mg = None + l1m = l2r + if _zen_model <= 3: + l1r = Event("ic_fw32") + l1h = max(l1r - l1m, 0) + itlb_l1_mg = MetricGroup("itlb_l1", [ + Metric("itlb_l1_hits", + "L1 ITLB hits as a perecentage of L1 ITLB accesses.", + d_ratio(l1h, l1h + l1m), "100%"), + Metric("itlb_l1_miss", + "L1 ITLB misses as a perecentage of L1 ITLB accesses.", + d_ratio(l1m, l1h + l1m), "100%"), + Metric("itlb_l1_reqs", + "The number of 32B fetch windows transferred from IC pipe to DE " + "instruction decoder per second.", d_ratio(l1r, interval_sec), + "windows/sec"), + ]) + + return MetricGroup("itlb", [ + MetricGroup("itlb_ov", [ + Metric("itlb_ov_insn_bt_l1_miss", + "Number of instructions between l1 misses", d_ratio( + ins, l1m), "insns"), + Metric("itlb_ov_insn_bt_l2_miss", + "Number of instructions between l2 misses", d_ratio( + ins, l2m), "insns"), + ]), + itlb_l1_mg, + MetricGroup("itlb_l2", [ + Metric("itlb_l2_hits", + "L2 ITLB hits as a percentage of all L2 ITLB accesses.", + d_ratio(l2h, l2r), "100%"), + Metric("itlb_l2_miss", + "L2 ITLB misses as a percentage of all L2 ITLB accesses.", + d_ratio(l2m, l2r), "100%"), + Metric("itlb_l2_reqs", "ITLB accesses per second.", + d_ratio(l2r, interval_sec), "accesses/sec"), + ]), + ], description="Instruction TLB breakdown") + + +def AmdLdSt() -> MetricGroup: + ldst_ld = Event("ls_dispatch.ld_dispatch") + ldst_st = Event("ls_dispatch.store_dispatch") + ldst_ldc1 = Event(f"{ldst_ld}/cmask=1/") + ldst_stc1 = Event(f"{ldst_st}/cmask=1/") + ldst_ldc2 = Event(f"{ldst_ld}/cmask=2/") + ldst_stc2 = Event(f"{ldst_st}/cmask=2/") + ldst_ldc3 = Event(f"{ldst_ld}/cmask=3/") + ldst_stc3 = Event(f"{ldst_st}/cmask=3/") + ldst_cyc = Event("ls_not_halted_cyc") + + ld_rate = d_ratio(ldst_ld, interval_sec) + st_rate = d_ratio(ldst_st, interval_sec) + + ld_v1 = max(ldst_ldc1 - ldst_ldc2, 0) + ld_v2 = max(ldst_ldc2 - ldst_ldc3, 0) + ld_v3 = ldst_ldc3 + + st_v1 = max(ldst_stc1 - ldst_stc2, 0) + st_v2 = max(ldst_stc2 - ldst_stc3, 0) + st_v3 = ldst_stc3 + + return MetricGroup("ldst", [ + MetricGroup("ldst_total", [ + Metric("ldst_total_ld", "Number of loads dispatched per second.", + ld_rate, "insns/sec"), + Metric("ldst_total_st", "Number of stores dispatched per second.", + st_rate, "insns/sec"), + ]), + MetricGroup("ldst_percent_insn", [ + Metric("ldst_percent_insn_ld", + "Load instructions as a percentage of all instructions.", + d_ratio(ldst_ld, ins), "100%"), + Metric("ldst_percent_insn_st", + "Store instructions as a percentage of all instructions.", + d_ratio(ldst_st, ins), "100%"), + ]), + MetricGroup("ldst_ret_loads_per_cycle", [ + Metric( + "ldst_ret_loads_per_cycle_1", + "Load instructions retiring in 1 cycle as a percentage of all " + "unhalted cycles.", d_ratio(ld_v1, ldst_cyc), "100%"), + Metric( + "ldst_ret_loads_per_cycle_2", + "Load instructions retiring in 2 cycles as a percentage of all " + "unhalted cycles.", d_ratio(ld_v2, ldst_cyc), "100%"), + Metric( + "ldst_ret_loads_per_cycle_3", + "Load instructions retiring in 3 or more cycles as a percentage" + "of all unhalted cycles.", d_ratio(ld_v3, ldst_cyc), "100%"), + ]), + MetricGroup("ldst_ret_stores_per_cycle", [ + Metric( + "ldst_ret_stores_per_cycle_1", + "Store instructions retiring in 1 cycle as a percentage of all " + "unhalted cycles.", d_ratio(st_v1, ldst_cyc), "100%"), + Metric( + "ldst_ret_stores_per_cycle_2", + "Store instructions retiring in 2 cycles as a percentage of all " + "unhalted cycles.", d_ratio(st_v2, ldst_cyc), "100%"), + Metric( + "ldst_ret_stores_per_cycle_3", + "Store instructions retiring in 3 or more cycles as a percentage" + "of all unhalted cycles.", d_ratio(st_v3, ldst_cyc), "100%"), + ]), + MetricGroup("ldst_insn_bt", [ + Metric("ldst_insn_bt_ld", "Number of instructions between loads.", + d_ratio(ins, ldst_ld), "insns"), + Metric("ldst_insn_bt_st", "Number of instructions between stores.", + d_ratio(ins, ldst_st), "insns"), + ]) + ], description="Breakdown of load/store instructions") + + +def AmdHwpf(): + """Returns a MetricGroup representing AMD hardware prefetch metrics.""" + global _zen_model + if _zen_model <= 1: + return None + + hwp_ld = Event("ls_dispatch.ld_dispatch") + hwp_l2 = Event("ls_hw_pf_dc_fills.local_l2", + "ls_hw_pf_dc_fills.lcl_l2", + "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2") + hwp_lc = Event("ls_hw_pf_dc_fills.local_ccx", + "ls_hw_pf_dc_fills.int_cache", + "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache") + hwp_lm = Event("ls_hw_pf_dc_fills.dram_io_near", + "ls_hw_pf_dc_fills.mem_io_local", + "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram") + hwp_rc = Event("ls_hw_pf_dc_fills.far_cache", + "ls_hw_pf_dc_fills.ext_cache_remote", + "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache") + hwp_rm = Event("ls_hw_pf_dc_fills.dram_io_far", + "ls_hw_pf_dc_fills.mem_io_remote", + "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram") + + loc_pf = hwp_l2 + hwp_lc + hwp_lm + rem_pf = hwp_rc + hwp_rm + all_pf = loc_pf + rem_pf + + r1 = d_ratio(ins, all_pf) + r2 = d_ratio(hwp_ld, all_pf) + r3 = d_ratio(all_pf, interval_sec) + + overview = MetricGroup("hwpf_overview", [ + Metric("hwpf_ov_insn_bt_hwpf", "Insn between HWPF", r1, "insns"), + Metric("hwpf_ov_loads_bt_hwpf", "Loads between HWPF", r2, "loads"), + Metric("hwpf_ov_rate", "HWPF per second", r3, "hwpf/s"), + ]) + r1 = d_ratio(hwp_l2, all_pf) + r2 = d_ratio(hwp_lc, all_pf) + r3 = d_ratio(hwp_lm, all_pf) + data_src_local = MetricGroup("hwpf_data_src_local", [ + Metric("hwpf_data_src_local_l2", "Data source local l2", r1, "100%"), + Metric("hwpf_data_src_local_ccx_l3_loc_ccx", + "Data source local ccx l3 loc ccx", r2, "100%"), + Metric("hwpf_data_src_local_memory_or_io", + "Data source local memory or IO", r3, "100%"), + ]) + + r1 = d_ratio(hwp_rc, all_pf) + r2 = d_ratio(hwp_rm, all_pf) + data_src_remote = MetricGroup("hwpf_data_src_remote", [ + Metric("hwpf_data_src_remote_cache", "Data source remote cache", r1, + "100%"), + Metric("hwpf_data_src_remote_memory_or_io", + "Data source remote memory or IO", r2, "100%"), + ]) + + data_src = MetricGroup("hwpf_data_src", [data_src_local, data_src_remote]) + return MetricGroup("hwpf", [overview, data_src], + description="Hardware prefetch breakdown (CCX L3 = L3 of current thread, Loc CCX = CCX cache on some socket)") + + +def AmdSwpf() -> Optional[MetricGroup]: + """Returns a MetricGroup representing AMD software prefetch metrics.""" + global _zen_model + if _zen_model <= 1: + return None + + swp_ld = Event("ls_dispatch.ld_dispatch") + swp_t0 = Event("ls_pref_instr_disp.prefetch") + swp_w = Event("ls_pref_instr_disp.prefetch_w") # Missing on Zen1 + swp_nt = Event("ls_pref_instr_disp.prefetch_nta") + swp_mab = Event("ls_inef_sw_pref.mab_mch_cnt") + swp_l2 = Event("ls_sw_pf_dc_fills.local_l2", + "ls_sw_pf_dc_fills.lcl_l2", + "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2") + swp_lc = Event("ls_sw_pf_dc_fills.local_ccx", + "ls_sw_pf_dc_fills.int_cache", + "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache") + swp_lm = Event("ls_sw_pf_dc_fills.dram_io_near", + "ls_sw_pf_dc_fills.mem_io_local", + "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram") + swp_rc = Event("ls_sw_pf_dc_fills.far_cache", + "ls_sw_pf_dc_fills.ext_cache_remote", + "ls_sw_pf_dc_fill.ls_mabresp_rmt_cache") + swp_rm = Event("ls_sw_pf_dc_fills.dram_io_far", + "ls_sw_pf_dc_fills.mem_io_remote", + "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram") + + # All the swpf that were satisfied beyond L1D are good. + all_pf = swp_t0 + swp_w + swp_nt + good_pf = swp_l2 + swp_lc + swp_lm + swp_rc + swp_rm + bad_pf = max(all_pf - good_pf, 0) + + loc_pf = swp_l2 + swp_lc + swp_lm + rem_pf = swp_rc + swp_rm + + req_pend = max(0, bad_pf - swp_mab) + + r1 = d_ratio(ins, all_pf) + r2 = d_ratio(swp_ld, all_pf) + r3 = d_ratio(swp_t0, interval_sec) + r4 = d_ratio(swp_w, interval_sec) + r5 = d_ratio(swp_nt, interval_sec) + overview = MetricGroup("swpf_overview", [ + Metric("swpf_ov_insn_bt_swpf", "Insn between SWPF", r1, "insns"), + Metric("swpf_ov_loads_bt_swpf", "Loads between SWPF", r2, "loads"), + Metric("swpf_ov_rate_prefetch_t0_t1_t2", "Rate prefetch TO_T1_T2", r3, + "insns/sec"), + Metric("swpf_ov_rate_prefetch_w", "Rate prefetch W", r4, "insns/sec"), + Metric("swpf_ov_rate_preftech_nta", "Rate prefetch NTA", r5, "insns/sec"), + ]) + + r1 = d_ratio(swp_mab, all_pf) + r2 = d_ratio(req_pend, all_pf) + usefulness_bad = MetricGroup("swpf_usefulness_bad", [ + Metric("swpf_use_bad_hit_l1", "Usefulness bad hit L1", r1, "100%"), + Metric("swpf_use_bad_req_pend", "Usefulness bad req pending", r2, "100%"), + ]) + + r1 = d_ratio(good_pf, all_pf) + usefulness_good = MetricGroup("swpf_usefulness_good", [ + Metric("swpf_use_good_other_src", "Usefulness good other src", r1, + "100%"), + ]) + + usefulness = MetricGroup("swpf_usefulness", [ + usefulness_bad, + usefulness_good, + ]) + + r1 = d_ratio(swp_l2, good_pf) + r2 = d_ratio(swp_lc, good_pf) + r3 = d_ratio(swp_lm, good_pf) + data_src_local = MetricGroup("swpf_data_src_local", [ + Metric("swpf_data_src_local_l2", "Data source local l2", r1, "100%"), + Metric("swpf_data_src_local_ccx_l3_loc_ccx", + "Data source local ccx l3 loc ccx", r2, "100%"), + Metric("swpf_data_src_local_memory_or_io", + "Data source local memory or IO", r3, "100%"), + ]) + + r1 = d_ratio(swp_rc, good_pf) + r2 = d_ratio(swp_rm, good_pf) + data_src_remote = MetricGroup("swpf_data_src_remote", [ + Metric("swpf_data_src_remote_cache", "Data source remote cache", r1, + "100%"), + Metric("swpf_data_src_remote_memory_or_io", + "Data source remote memory or IO", r2, "100%"), + ]) + + data_src = MetricGroup("swpf_data_src", [data_src_local, data_src_remote]) + + return MetricGroup("swpf", [overview, usefulness, data_src], + description="Software prefetch breakdown (CCX L3 = L3 of current thread, Loc CCX = CCX cache on some socket)") + + +def AmdUopCache() -> Optional[MetricGroup]: + try: + op_cache_hit = Event("op_cache_hit_miss.op_cache_hit") + op_cache_miss = Event("op_cache_hit_miss.op_cache_miss") + except: + return None + op_cache_total = op_cache_hit + op_cache_miss + return MetricGroup("uop_cache", [ + Metric("uop_cache_hit_ratio", "Uop cache full or partial hits rate", + d_ratio(op_cache_hit, op_cache_total), + "100%"), + Metric("uop_cache_miss_ratio", "Uop cache misses rate", + d_ratio(op_cache_miss, op_cache_total), + "100%"), + ], description="Micro-op (uop) hit and miss rates.") + + +def AmdUpc() -> Metric: + ops = Event("ex_ret_ops", "ex_ret_cops") + upc = d_ratio(ops, smt_cycles) + return Metric("upc", "Micro-ops retired per core cycle (higher is better)", + upc, "uops/cycle") + + +def Idle() -> Metric: + cyc = Event("msr/mperf/") + tsc = Event("msr/tsc/") + low = max(tsc - cyc, 0) + return Metric( + "idle", + "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)", + d_ratio(low, tsc), "100%") + + +def Rapl() -> MetricGroup: + """Processor socket power consumption estimate. + + Use events from the running average power limit (RAPL) driver. + """ + # Watts = joules/second + # Currently only energy-pkg is supported by AMD: + # https://lore.kernel.org/lkml/20220105185659.643355-1-eranian@google.com/ + pkg = Event("power/energy\-pkg/") + cond_pkg = Select(pkg, has_event(pkg), math.nan) + scale = 2.3283064365386962890625e-10 + metrics = [ + Metric("cpu_power_pkg", "", + d_ratio(cond_pkg * scale, interval_sec), "Watts"), + ] + + return MetricGroup("cpu_power", metrics, + description="Processor socket power consumption estimates") + +def UncoreL3(): + acc = Event("l3_lookup_state.all_coherent_accesses_to_l3", + "l3_lookup_state.all_l3_req_typs") + miss = Event("l3_lookup_state.l3_miss", + "l3_comb_clstr_state.request_miss") + acc = max(acc, miss) + hits = acc - miss + + return MetricGroup("l3", [ + Metric("l3_accesses", "L3 victim cache accesses", + d_ratio(acc, interval_sec), "accesses/sec"), + Metric("l3_hits", "L3 victim cache hit rate", d_ratio(hits, acc), "100%"), + Metric("l3_miss", "L3 victim cache miss rate", d_ratio(miss, acc), + "100%"), + ], description="L3 cache breakdown per CCX") + + +def main() -> None: + global _args + global _zen_model + + def dir_path(path: str) -> str: + """Validate path is a directory for argparse.""" + if os.path.isdir(path): + return path + raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory') + + parser = argparse.ArgumentParser(description="AMD perf json generator") + parser.add_argument("-metricgroups", help="Generate metricgroups data", action='store_true') + parser.add_argument("model", help="e.g. amdzen[123]") + parser.add_argument( + 'events_path', + type=dir_path, + help='Root of tree containing architecture directories containing json files' + ) + _args = parser.parse_args() + + directory = f"{_args.events_path}/x86/{_args.model}/" + LoadEvents(directory) + + _zen_model = int(_args.model[6:]) + + all_metrics = MetricGroup("", [ + AmdBr(), + AmdCtxSw(), + AmdIlp(), + AmdDtlb(), + AmdItlb(), + AmdLdSt(), + AmdHwpf(), + AmdSwpf(), + AmdUopCache(), + AmdUpc(), + Cycles(), + Idle(), + Rapl(), + UncoreL3(), + ]) + + if _args.metricgroups: + print(JsonEncodeMetricGroupDescriptions(all_metrics)) + else: + print(JsonEncodeMetric(all_metrics)) + +if __name__ == '__main__': + main() diff --git a/tools/perf/pmu-events/arm64_metrics.py b/tools/perf/pmu-events/arm64_metrics.py new file mode 100755 index 000000000000..5285a22ff0c8 --- /dev/null +++ b/tools/perf/pmu-events/arm64_metrics.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +from metric import (d_ratio, Event, JsonEncodeMetric, JsonEncodeMetricGroupDescriptions, + LoadEvents, Metric, MetricGroup) +import argparse +from common_metrics import Cycles +import json +import os +from typing import Optional + +# Global command line arguments. +_args = None + +def Arm64Topdown() -> MetricGroup: + """Returns a MetricGroup representing ARM64 topdown like metrics.""" + def TryEvent(name: str) -> Optional[Event]: + # Skip an event if not in the json files. + try: + return Event(name) + except: + return None + # ARM models like a53 lack JSON for INST_RETIRED but have the + # architetural standard event in sysfs. Use the PMU name to identify + # the sysfs event. + pmu_name = f'armv8_{_args.model.replace("-", "_")}' + ins = Event("instructions") + ins_ret = Event("INST_RETIRED", f"{pmu_name}/inst_retired/") + cycles = Event("cycles") + stall_fe = TryEvent("STALL_FRONTEND") + stall_be = TryEvent("STALL_BACKEND") + br_ret = TryEvent("BR_RETIRED") + br_mp_ret = TryEvent("BR_MIS_PRED_RETIRED") + dtlb_walk = TryEvent("DTLB_WALK") + itlb_walk = TryEvent("ITLB_WALK") + l1d_tlb = TryEvent("L1D_TLB") + l1i_tlb = TryEvent("L1I_TLB") + l1d_refill = Event("L1D_CACHE_REFILL", f"{pmu_name}/l1d_cache_refill/") + l2d_refill = Event("L2D_CACHE_REFILL", f"{pmu_name}/l2d_cache_refill/") + l1i_refill = Event("L1I_CACHE_REFILL", f"{pmu_name}/l1i_cache_refill/") + l1d_access = Event("L1D_CACHE", f"{pmu_name}/l1d_cache/") + l2d_access = Event("L2D_CACHE", f"{pmu_name}/l2d_cache/") + llc_access = TryEvent("LL_CACHE_RD") + l1i_access = Event("L1I_CACHE", f"{pmu_name}/l1i_cache/") + llc_miss_rd = TryEvent("LL_CACHE_MISS_RD") + ase_spec = TryEvent("ASE_SPEC") + ld_spec = TryEvent("LD_SPEC") + st_spec = TryEvent("ST_SPEC") + vfp_spec = TryEvent("VFP_SPEC") + dp_spec = TryEvent("DP_SPEC") + br_immed_spec = TryEvent("BR_IMMED_SPEC") + br_indirect_spec = TryEvent("BR_INDIRECT_SPEC") + br_ret_spec = TryEvent("BR_RETURN_SPEC") + crypto_spec = TryEvent("CRYPTO_SPEC") + inst_spec = TryEvent("INST_SPEC") + + return MetricGroup("topdown", [ + MetricGroup("topdown_tl", [ + Metric("topdown_tl_ipc", "Instructions per cycle", d_ratio( + ins, cycles), "insn/cycle"), + Metric("topdown_tl_stall_fe_rate", "Frontend stalls to all cycles", + d_ratio(stall_fe, cycles), "100%") if stall_fe else None, + Metric("topdown_tl_stall_be_rate", "Backend stalls to all cycles", + d_ratio(stall_be, cycles), "100%") if stall_be else None, + ]), + MetricGroup("topdown_fe_bound", [ + MetricGroup("topdown_fe_br", [ + Metric("topdown_fe_br_mp_per_insn", + "Branch mispredicts per instruction retired", + d_ratio(br_mp_ret, ins_ret), "br/insn") if br_mp_ret else None, + Metric("topdown_fe_br_ins_rate", + "Branches per instruction retired", d_ratio( + br_ret, ins_ret), "100%") if br_ret else None, + Metric("topdown_fe_br_mispredict", + "Branch mispredicts per branch instruction", + d_ratio(br_mp_ret, br_ret), "100%") if br_mp_ret else None, + ]), + MetricGroup("topdown_fe_itlb", [ + Metric("topdown_fe_itlb_walks", "Itlb walks per insn", + d_ratio(itlb_walk, ins_ret), "walk/insn"), + Metric("topdown_fe_itlb_walk_rate", "Itlb walks per l1i access", + d_ratio(itlb_walk, l1i_tlb), "100%"), + ]) if itlb_walk else None, + MetricGroup("topdown_fe_icache", [ + Metric("topdown_fe_icache_l1i_per_insn", + "L1I cache refills per instruction", + d_ratio(l1i_refill, ins_ret), "l1i/insn"), + Metric("topdown_fe_icache_l1i_miss_rate", + "L1I cache refills per L1I cache access", + d_ratio(l1i_refill, l1i_access), "100%"), + ]), + ]), + MetricGroup("topdown_be_bound", [ + MetricGroup("topdown_be_dtlb", [ + Metric("topdown_be_dtlb_walks", "Dtlb walks per instruction", + d_ratio(dtlb_walk, ins_ret), "walk/insn"), + Metric("topdown_be_dtlb_walk_rate", "Dtlb walks per l1d access", + d_ratio(dtlb_walk, l1d_tlb), "100%"), + ]) if dtlb_walk else None, + MetricGroup("topdown_be_mix", [ + Metric("topdown_be_mix_ld", "Percentage of load instructions", + d_ratio(ld_spec, inst_spec), "100%") if ld_spec else None, + Metric("topdown_be_mix_st", "Percentage of store instructions", + d_ratio(st_spec, inst_spec), "100%") if st_spec else None, + Metric("topdown_be_mix_simd", "Percentage of SIMD instructions", + d_ratio(ase_spec, inst_spec), "100%") if ase_spec else None, + Metric("topdown_be_mix_fp", + "Percentage of floating point instructions", + d_ratio(vfp_spec, inst_spec), "100%") if vfp_spec else None, + Metric("topdown_be_mix_dp", + "Percentage of data processing instructions", + d_ratio(dp_spec, inst_spec), "100%") if dp_spec else None, + Metric("topdown_be_mix_crypto", + "Percentage of data processing instructions", + d_ratio(crypto_spec, inst_spec), "100%") if crypto_spec else None, + Metric( + "topdown_be_mix_br", "Percentage of branch instructions", + d_ratio(br_immed_spec + br_indirect_spec + br_ret_spec, + inst_spec), "100%") if br_immed_spec and br_indirect_spec and br_ret_spec else None, + ]) if inst_spec else None, + MetricGroup("topdown_be_dcache", [ + MetricGroup("topdown_be_dcache_l1", [ + Metric("topdown_be_dcache_l1_per_insn", + "L1D cache refills per instruction", + d_ratio(l1d_refill, ins_ret), "refills/insn"), + Metric("topdown_be_dcache_l1_miss_rate", + "L1D cache refills per L1D cache access", + d_ratio(l1d_refill, l1d_access), "100%") + ]), + MetricGroup("topdown_be_dcache_l2", [ + Metric("topdown_be_dcache_l2_per_insn", + "L2D cache refills per instruction", + d_ratio(l2d_refill, ins_ret), "refills/insn"), + Metric("topdown_be_dcache_l2_miss_rate", + "L2D cache refills per L2D cache access", + d_ratio(l2d_refill, l2d_access), "100%") + ]), + MetricGroup("topdown_be_dcache_llc", [ + Metric("topdown_be_dcache_llc_per_insn", + "Last level cache misses per instruction", + d_ratio(llc_miss_rd, ins_ret), "miss/insn"), + Metric("topdown_be_dcache_llc_miss_rate", + "Last level cache misses per L2D cache access", + d_ratio(llc_miss_rd, llc_access), "100%") + ]) if llc_miss_rd and llc_access else None, + ]), + ]), + ]) + + +def main() -> None: + global _args + + def dir_path(path: str) -> str: + """Validate path is a directory for argparse.""" + if os.path.isdir(path): + return path + raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory') + + parser = argparse.ArgumentParser(description="ARM perf json generator") + parser.add_argument("-metricgroups", help="Generate metricgroups data", action='store_true') + parser.add_argument("vendor", help="e.g. arm") + parser.add_argument("model", help="e.g. neoverse-n1") + parser.add_argument( + 'events_path', + type=dir_path, + help='Root of tree containing architecture directories containing json files' + ) + _args = parser.parse_args() + + directory = f"{_args.events_path}/arm64/{_args.vendor}/{_args.model}/" + LoadEvents(directory) + + all_metrics = MetricGroup("",[ + Arm64Topdown(), + Cycles(), + ]) + + if _args.metricgroups: + print(JsonEncodeMetricGroupDescriptions(all_metrics)) + else: + print(JsonEncodeMetric(all_metrics)) + +if __name__ == '__main__': + main() diff --git a/tools/perf/pmu-events/common_metrics.py b/tools/perf/pmu-events/common_metrics.py new file mode 100644 index 000000000000..74c58f9ab020 --- /dev/null +++ b/tools/perf/pmu-events/common_metrics.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +from metric import (d_ratio, Event, Metric, MetricGroup) + +def Cycles() -> MetricGroup: + cyc_k = Event("cycles:kHh") + cyc_g = Event("cycles:G") + cyc_u = Event("cycles:uH") + cyc = cyc_k + cyc_g + cyc_u + + return MetricGroup("cycles", [ + Metric("cycles_total", "Total number of cycles", cyc, "cycles"), + Metric("cycles_user", "User cycles as a percentage of all cycles", + d_ratio(cyc_u, cyc), "100%"), + Metric("cycles_kernel", "Kernel cycles as a percentage of all cycles", + d_ratio(cyc_k, cyc), "100%"), + Metric("cycles_guest", "Hypervisor guest cycles as a percentage of all cycles", + d_ratio(cyc_g, cyc), "100%"), + ], description = "cycles breakdown per privilege level (users, kernel, guest)") diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py new file mode 100755 index 000000000000..4b7668e25e54 --- /dev/null +++ b/tools/perf/pmu-events/intel_metrics.py @@ -0,0 +1,1084 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +from metric import (d_ratio, has_event, max, source_count, CheckPmu, Event, + JsonEncodeMetric, JsonEncodeMetricGroupDescriptions, + Literal, LoadEvents, Metric, MetricConstraint, MetricGroup, + MetricRef, Select) +import argparse +from common_metrics import Cycles +import json +import math +import os +import re +from typing import Optional + +# Global command line arguments. +_args = None +interval_sec = Event("duration_time") + +def Idle() -> Metric: + cyc = Event("msr/mperf/") + tsc = Event("msr/tsc/") + low = max(tsc - cyc, 0) + return Metric( + "idle", + "Percentage of total wallclock cycles where CPUs are in low power state (C1 or deeper sleep state)", + d_ratio(low, tsc), "100%") + + +def Rapl() -> MetricGroup: + """Processor power consumption estimate. + + Use events from the running average power limit (RAPL) driver. + """ + # Watts = joules/second + pkg = Event("power/energy\-pkg/") + cond_pkg = Select(pkg, has_event(pkg), math.nan) + cores = Event("power/energy\-cores/") + cond_cores = Select(cores, has_event(cores), math.nan) + ram = Event("power/energy\-ram/") + cond_ram = Select(ram, has_event(ram), math.nan) + gpu = Event("power/energy\-gpu/") + cond_gpu = Select(gpu, has_event(gpu), math.nan) + psys = Event("power/energy\-psys/") + cond_psys = Select(psys, has_event(psys), math.nan) + scale = 2.3283064365386962890625e-10 + metrics = [ + Metric("cpu_power_pkg", "", + d_ratio(cond_pkg * scale, interval_sec), "Watts"), + Metric("cpu_power_cores", "", + d_ratio(cond_cores * scale, interval_sec), "Watts"), + Metric("cpu_power_ram", "", + d_ratio(cond_ram * scale, interval_sec), "Watts"), + Metric("cpu_power_gpu", "", + d_ratio(cond_gpu * scale, interval_sec), "Watts"), + Metric("cpu_power_psys", "", + d_ratio(cond_psys * scale, interval_sec), "Watts"), + ] + + return MetricGroup("cpu_power", metrics, + description="Running Average Power Limit (RAPL) power consumption estimates") + + +def Smi() -> MetricGroup: + aperf = Event('msr/aperf/') + cycles = Event('cycles') + smi_num = Event('msr/smi/') + smi_cycles = Select(Select((aperf - cycles) / aperf, smi_num > 0, 0), + has_event(aperf), + 0) + return MetricGroup('smi', [ + Metric('smi_num', 'Number of SMI interrupts.', + Select(smi_num, has_event(smi_num), 0), 'SMI#'), + # Note, the smi_cycles "Event" is really a reference to the metric. + Metric('smi_cycles', + 'Percentage of cycles spent in System Management Interrupts. ' + 'Requires /sys/devices/cpu/freeze_on_smi to be 1.', + smi_cycles, '100%', threshold=(MetricRef('smi_cycles') > 0.10)) + ], description = 'System Management Interrupt metrics') + + +def Tsx() -> Optional[MetricGroup]: + pmu = "cpu_core" if CheckPmu("cpu_core") else "cpu" + cycles = Event('cycles') + cycles_in_tx = Event(f'{pmu}/cycles\-t/') + cycles_in_tx_cp = Event(f'{pmu}/cycles\-ct/') + try: + # Test if the tsx event is present in the json, prefer the + # sysfs version so that we can detect its presence at runtime. + transaction_start = Event("RTM_RETIRED.START") + transaction_start = Event(f'{pmu}/tx\-start/') + except: + return None + + elision_start = None + try: + # Elision start isn't supported by all models, but we'll not + # generate the tsx_cycles_per_elision metric in that + # case. Again, prefer the sysfs encoding of the event. + elision_start = Event("HLE_RETIRED.START") + elision_start = Event(f'{pmu}/el\-start/') + except: + pass + + return MetricGroup('transaction', [ + Metric('tsx_transactional_cycles', + 'Percentage of cycles within a transaction region.', + Select(cycles_in_tx / cycles, has_event(cycles_in_tx), 0), + '100%'), + Metric('tsx_aborted_cycles', 'Percentage of cycles in aborted transactions.', + Select(max(cycles_in_tx - cycles_in_tx_cp, 0) / cycles, + has_event(cycles_in_tx), + 0), + '100%'), + Metric('tsx_cycles_per_transaction', + 'Number of cycles within a transaction divided by the number of transactions.', + Select(cycles_in_tx / transaction_start, + has_event(cycles_in_tx), + 0), + "cycles / transaction"), + Metric('tsx_cycles_per_elision', + 'Number of cycles within a transaction divided by the number of elisions.', + Select(cycles_in_tx / elision_start, + has_event(elision_start), + 0), + "cycles / elision") if elision_start else None, + ], description="Breakdown of transactional memory statistics") + + +def IntelBr(): + ins = Event("instructions") + + def Total() -> MetricGroup: + br_all = Event ("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY") + br_m_all = Event("BR_MISP_RETIRED.ALL_BRANCHES", + "BR_INST_RETIRED.MISPRED", + "BR_MISP_EXEC.ANY") + br_clr = None + try: + br_clr = Event("BACLEARS.ANY", "BACLEARS.ALL") + except: + pass + + br_r = d_ratio(br_all, interval_sec) + ins_r = d_ratio(ins, br_all) + misp_r = d_ratio(br_m_all, br_all) + clr_r = d_ratio(br_clr, interval_sec) if br_clr else None + + return MetricGroup("br_total", [ + Metric("br_total_retired", + "The number of branch instructions retired per second.", br_r, + "insn/s"), + Metric( + "br_total_mispred", + "The number of branch instructions retired, of any type, that were " + "not correctly predicted as a percentage of all branch instrucions.", + misp_r, "100%"), + Metric("br_total_insn_between_branches", + "The number of instructions divided by the number of branches.", + ins_r, "insn"), + Metric("br_total_insn_fe_resteers", + "The number of resync branches per second.", clr_r, "req/s" + ) if clr_r else None + ]) + + def Taken() -> MetricGroup: + br_all = Event("BR_INST_RETIRED.ALL_BRANCHES", "BR_INST_RETIRED.ANY") + br_m_tk = None + try: + br_m_tk = Event("BR_MISP_RETIRED.NEAR_TAKEN", + "BR_MISP_RETIRED.TAKEN_JCC", + "BR_INST_RETIRED.MISPRED_TAKEN") + except: + pass + br_r = d_ratio(br_all, interval_sec) + ins_r = d_ratio(ins, br_all) + misp_r = d_ratio(br_m_tk, br_all) if br_m_tk else None + return MetricGroup("br_taken", [ + Metric("br_taken_retired", + "The number of taken branches that were retired per second.", + br_r, "insn/s"), + Metric( + "br_taken_mispred", + "The number of retired taken branch instructions that were " + "mispredicted as a percentage of all taken branches.", misp_r, + "100%") if misp_r else None, + Metric( + "br_taken_insn_between_branches", + "The number of instructions divided by the number of taken branches.", + ins_r, "insn"), + ]) + + def Conditional() -> Optional[MetricGroup]: + try: + br_cond = Event("BR_INST_RETIRED.COND", + "BR_INST_RETIRED.CONDITIONAL", + "BR_INST_RETIRED.TAKEN_JCC") + br_m_cond = Event("BR_MISP_RETIRED.COND", + "BR_MISP_RETIRED.CONDITIONAL", + "BR_MISP_RETIRED.TAKEN_JCC") + except: + return None + + br_cond_nt = None + br_m_cond_nt = None + try: + br_cond_nt = Event("BR_INST_RETIRED.COND_NTAKEN") + br_m_cond_nt = Event("BR_MISP_RETIRED.COND_NTAKEN") + except: + pass + br_r = d_ratio(br_cond, interval_sec) + ins_r = d_ratio(ins, br_cond) + misp_r = d_ratio(br_m_cond, br_cond) + taken_metrics = [ + Metric("br_cond_retired", "Retired conditional branch instructions.", + br_r, "insn/s"), + Metric("br_cond_insn_between_branches", + "The number of instructions divided by the number of conditional " + "branches.", ins_r, "insn"), + Metric("br_cond_mispred", + "Retired conditional branch instructions mispredicted as a " + "percentage of all conditional branches.", misp_r, "100%"), + ] + if not br_m_cond_nt: + return MetricGroup("br_cond", taken_metrics) + + br_r = d_ratio(br_cond_nt, interval_sec) + ins_r = d_ratio(ins, br_cond_nt) + misp_r = d_ratio(br_m_cond_nt, br_cond_nt) + + not_taken_metrics = [ + Metric("br_cond_retired", "Retired conditional not taken branch instructions.", + br_r, "insn/s"), + Metric("br_cond_insn_between_branches", + "The number of instructions divided by the number of not taken conditional " + "branches.", ins_r, "insn"), + Metric("br_cond_mispred", + "Retired not taken conditional branch instructions mispredicted as a " + "percentage of all not taken conditional branches.", misp_r, "100%"), + ] + return MetricGroup("br_cond", [ + MetricGroup("br_cond_nt", not_taken_metrics), + MetricGroup("br_cond_tkn", taken_metrics), + ]) + + def Far() -> Optional[MetricGroup]: + try: + br_far = Event("BR_INST_RETIRED.FAR_BRANCH") + except: + return None + + br_r = d_ratio(br_far, interval_sec) + ins_r = d_ratio(ins, br_far) + return MetricGroup("br_far", [ + Metric("br_far_retired", "Retired far control transfers per second.", + br_r, "insn/s"), + Metric( + "br_far_insn_between_branches", + "The number of instructions divided by the number of far branches.", + ins_r, "insn"), + ]) + + return MetricGroup("br", [Total(), Taken(), Conditional(), Far()], + description="breakdown of retired branch instructions") + + +def IntelCtxSw() -> MetricGroup: + cs = Event("context\-switches") + metrics = [ + Metric("cs_rate", "Context switches per second", d_ratio(cs, interval_sec), "ctxsw/s") + ] + + ev = Event("instructions") + metrics.append(Metric("cs_instr", "Instructions per context switch", + d_ratio(ev, cs), "instr/cs")) + + ev = Event("cycles") + metrics.append(Metric("cs_cycles", "Cycles per context switch", + d_ratio(ev, cs), "cycles/cs")) + + try: + ev = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS") + metrics.append(Metric("cs_loads", "Loads per context switch", + d_ratio(ev, cs), "loads/cs")) + except: + pass + + try: + ev = Event("MEM_INST_RETIRED.ALL_STORES", "MEM_UOPS_RETIRED.ALL_STORES") + metrics.append(Metric("cs_stores", "Stores per context switch", + d_ratio(ev, cs), "stores/cs")) + except: + pass + + try: + ev = Event("BR_INST_RETIRED.NEAR_TAKEN", "BR_INST_RETIRED.TAKEN_JCC") + metrics.append(Metric("cs_br_taken", "Branches taken per context switch", + d_ratio(ev, cs), "br_taken/cs")) + except: + pass + + try: + l2_misses = (Event("L2_RQSTS.DEMAND_DATA_RD_MISS") + + Event("L2_RQSTS.RFO_MISS") + + Event("L2_RQSTS.CODE_RD_MISS")) + try: + l2_misses += Event("L2_RQSTS.HWPF_MISS", "L2_RQSTS.L2_PF_MISS", "L2_RQSTS.PF_MISS") + except: + pass + + metrics.append(Metric("cs_l2_misses", "L2 misses per context switch", + d_ratio(l2_misses, cs), "l2_misses/cs")) + except: + pass + + return MetricGroup("cs", metrics, + description = ("Number of context switches per second, instructions " + "retired & core cycles between context switches")) + + +def IntelFpu() -> Optional[MetricGroup]: + cyc = Event("cycles") + try: + s_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "SIMD_INST_RETIRED.SCALAR_SINGLE") + except: + return None + d_64 = Event("FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "SIMD_INST_RETIRED.SCALAR_DOUBLE") + s_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "SIMD_INST_RETIRED.PACKED_SINGLE") + + flop = s_64 + d_64 + 4 * s_128 + + d_128 = None + s_256 = None + d_256 = None + s_512 = None + d_512 = None + try: + d_128 = Event("FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE") + flop += 2 * d_128 + s_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE") + flop += 8 * s_256 + d_256 = Event("FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE") + flop += 4 * d_256 + s_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE") + flop += 16 * s_512 + d_512 = Event("FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE") + flop += 8 * d_512 + except: + pass + + f_assist = Event("ASSISTS.FP", "FP_ASSIST.ANY", "FP_ASSIST.S") + if f_assist in [ + "ASSISTS.FP", + "FP_ASSIST.S", + ]: + f_assist += "/cmask=1/" + + flop_r = d_ratio(flop, interval_sec) + flop_c = d_ratio(flop, cyc) + nmi_constraint = MetricConstraint.GROUPED_EVENTS + if f_assist.name == "ASSISTS.FP": # Icelake+ + nmi_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI + def FpuMetrics(group: str, fl: Optional[Event], mult: int, desc: str) -> Optional[MetricGroup]: + if not fl: + return None + + f = fl * mult + fl_r = d_ratio(f, interval_sec) + r_s = d_ratio(fl, interval_sec) + return MetricGroup(group, [ + Metric(f"{group}_of_total", desc + " floating point operations per second", + d_ratio(f, flop), "100%"), + Metric(f"{group}_flops", desc + " floating point operations per second", + fl_r, "flops/s"), + Metric(f"{group}_ops", desc + " operations per second", + r_s, "ops/s"), + ]) + + return MetricGroup("fpu", [ + MetricGroup("fpu_total", [ + Metric("fpu_total_flops", "Floating point operations per second", + flop_r, "flops/s"), + Metric("fpu_total_flopc", "Floating point operations per cycle", + flop_c, "flops/cycle", constraint=nmi_constraint), + ]), + MetricGroup("fpu_64", [ + FpuMetrics("fpu_64_single", s_64, 1, "64-bit single"), + FpuMetrics("fpu_64_double", d_64, 1, "64-bit double"), + ]), + MetricGroup("fpu_128", [ + FpuMetrics("fpu_128_single", s_128, 4, "128-bit packed single"), + FpuMetrics("fpu_128_double", d_128, 2, "128-bit packed double"), + ]), + MetricGroup("fpu_256", [ + FpuMetrics("fpu_256_single", s_256, 8, "128-bit packed single"), + FpuMetrics("fpu_256_double", d_256, 4, "128-bit packed double"), + ]), + MetricGroup("fpu_512", [ + FpuMetrics("fpu_512_single", s_512, 16, "128-bit packed single"), + FpuMetrics("fpu_512_double", d_512, 8, "128-bit packed double"), + ]), + Metric("fpu_assists", "FP assists as a percentage of cycles", + d_ratio(f_assist, cyc), "100%"), + ]) + + +def IntelIlp() -> MetricGroup: + tsc = Event("msr/tsc/") + c0 = Event("msr/mperf/") + low = tsc - c0 + inst_ret = Event("INST_RETIRED.ANY_P") + inst_ret_c = [Event(f"{inst_ret.name}/cmask={x}/") for x in range(1, 6)] + core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY", + "CPU_CLK_UNHALTED.DISTRIBUTED", + "cycles") + ilp = [d_ratio(max(inst_ret_c[x] - inst_ret_c[x + 1], 0), core_cycles) for x in range(0, 4)] + ilp.append(d_ratio(inst_ret_c[4], core_cycles)) + ilp0 = 1 + for x in ilp: + ilp0 -= x + return MetricGroup("ilp", [ + Metric("ilp_idle", "Lower power cycles as a percentage of all cycles", + d_ratio(low, tsc), "100%"), + Metric("ilp_inst_ret_0", "Instructions retired in 0 cycles as a percentage of all cycles", + ilp0, "100%"), + Metric("ilp_inst_ret_1", "Instructions retired in 1 cycles as a percentage of all cycles", + ilp[0], "100%"), + Metric("ilp_inst_ret_2", "Instructions retired in 2 cycles as a percentage of all cycles", + ilp[1], "100%"), + Metric("ilp_inst_ret_3", "Instructions retired in 3 cycles as a percentage of all cycles", + ilp[2], "100%"), + Metric("ilp_inst_ret_4", "Instructions retired in 4 cycles as a percentage of all cycles", + ilp[3], "100%"), + Metric("ilp_inst_ret_5", "Instructions retired in 5 or more cycles as a percentage of all cycles", + ilp[4], "100%"), + ]) + + +def IntelL2() -> Optional[MetricGroup]: + try: + DC_HIT = Event("L2_RQSTS.DEMAND_DATA_RD_HIT") + except: + return None + try: + DC_MISS = Event("L2_RQSTS.DEMAND_DATA_RD_MISS") + l2_dmnd_miss = DC_MISS + l2_dmnd_rd_all = DC_MISS + DC_HIT + except: + DC_ALL = Event("L2_RQSTS.ALL_DEMAND_DATA_RD") + l2_dmnd_miss = DC_ALL - DC_HIT + l2_dmnd_rd_all = DC_ALL + l2_dmnd_mrate = d_ratio(l2_dmnd_miss, interval_sec) + l2_dmnd_rrate = d_ratio(l2_dmnd_rd_all, interval_sec) + + DC_PFH = None + DC_PFM = None + l2_pf_all = None + l2_pf_mrate = None + l2_pf_rrate = None + try: + DC_PFH = Event("L2_RQSTS.PF_HIT") + DC_PFM = Event("L2_RQSTS.PF_MISS") + l2_pf_all = DC_PFH + DC_PFM + l2_pf_mrate = d_ratio(DC_PFM, interval_sec) + l2_pf_rrate = d_ratio(l2_pf_all, interval_sec) + except: + pass + + DC_RFOH = Event("L2_RQSTS.RFO_HIT") + DC_RFOM = Event("L2_RQSTS.RFO_MISS") + l2_rfo_all = DC_RFOH + DC_RFOM + l2_rfo_mrate = d_ratio(DC_RFOM, interval_sec) + l2_rfo_rrate = d_ratio(l2_rfo_all, interval_sec) + + DC_CH = Event("L2_RQSTS.CODE_RD_HIT") + DC_CM = Event("L2_RQSTS.CODE_RD_MISS") + DC_IN = Event("L2_LINES_IN.ALL") + DC_OUT_NS = None + DC_OUT_S = None + l2_lines_out = None + l2_out_rate = None + wbn = None + isd = None + try: + DC_OUT_NS = Event("L2_LINES_OUT.NON_SILENT", + "L2_LINES_OUT.DEMAND_DIRTY", + "L2_LINES_IN.S") + DC_OUT_S = Event("L2_LINES_OUT.SILENT", + "L2_LINES_OUT.DEMAND_CLEAN", + "L2_LINES_IN.I") + if DC_OUT_S.name == "L2_LINES_OUT.SILENT" and ( + args.model.startswith("skylake") or + args.model == "cascadelakex"): + DC_OUT_S.name = "L2_LINES_OUT.SILENT/any/" + # bring is back to per-CPU + l2_s = Select(DC_OUT_S / 2, Literal("#smt_on"), DC_OUT_S) + l2_ns = DC_OUT_NS + l2_lines_out = l2_s + l2_ns; + l2_out_rate = d_ratio(l2_lines_out, interval_sec); + nlr = max(l2_ns - DC_WB_U - DC_WB_D, 0) + wbn = d_ratio(nlr, interval_sec) + isd = d_ratio(l2_s, interval_sec) + except: + pass + DC_OUT_U = None + l2_pf_useless = None + l2_useless_rate = None + try: + DC_OUT_U = Event("L2_LINES_OUT.USELESS_HWPF") + l2_pf_useless = DC_OUT_U + l2_useless_rate = d_ratio(l2_pf_useless, interval_sec) + except: + pass + DC_WB_U = None + DC_WB_D = None + wbu = None + wbd = None + try: + DC_WB_U = Event("IDI_MISC.WB_UPGRADE") + DC_WB_D = Event("IDI_MISC.WB_DOWNGRADE") + wbu = d_ratio(DC_WB_U, interval_sec) + wbd = d_ratio(DC_WB_D, interval_sec) + except: + pass + + l2_lines_in = DC_IN + l2_code_all = DC_CH + DC_CM + l2_code_rate = d_ratio(l2_code_all, interval_sec) + l2_code_miss_rate = d_ratio(DC_CM, interval_sec) + l2_in_rate = d_ratio(l2_lines_in, interval_sec) + + return MetricGroup("l2", [ + MetricGroup("l2_totals", [ + Metric("l2_totals_in", "L2 cache total in per second", + l2_in_rate, "In/s"), + Metric("l2_totals_out", "L2 cache total out per second", + l2_out_rate, "Out/s") if l2_out_rate else None, + ]), + MetricGroup("l2_rd", [ + Metric("l2_rd_hits", "L2 cache data read hits", + d_ratio(DC_HIT, l2_dmnd_rd_all), "100%"), + Metric("l2_rd_hits", "L2 cache data read hits", + d_ratio(l2_dmnd_miss, l2_dmnd_rd_all), "100%"), + Metric("l2_rd_requests", "L2 cache data read requests per second", + l2_dmnd_rrate, "requests/s"), + Metric("l2_rd_misses", "L2 cache data read misses per second", + l2_dmnd_mrate, "misses/s"), + ]), + MetricGroup("l2_hwpf", [ + Metric("l2_hwpf_hits", "L2 cache hardware prefetcher hits", + d_ratio(DC_PFH, l2_pf_all), "100%"), + Metric("l2_hwpf_misses", "L2 cache hardware prefetcher misses", + d_ratio(DC_PFM, l2_pf_all), "100%"), + Metric("l2_hwpf_useless", "L2 cache hardware prefetcher useless prefetches per second", + l2_useless_rate, "100%") if l2_useless_rate else None, + Metric("l2_hwpf_requests", "L2 cache hardware prefetcher requests per second", + l2_pf_rrate, "100%"), + Metric("l2_hwpf_misses", "L2 cache hardware prefetcher misses per second", + l2_pf_mrate, "100%"), + ]) if DC_PFH else None, + MetricGroup("l2_rfo", [ + Metric("l2_rfo_hits", "L2 cache request for ownership (RFO) hits", + d_ratio(DC_RFOH, l2_rfo_all), "100%"), + Metric("l2_rfo_misses", "L2 cache request for ownership (RFO) misses", + d_ratio(DC_RFOM, l2_rfo_all), "100%"), + Metric("l2_rfo_requests", "L2 cache request for ownership (RFO) requests per second", + l2_rfo_rrate, "requests/s"), + Metric("l2_rfo_misses", "L2 cache request for ownership (RFO) misses per second", + l2_rfo_mrate, "misses/s"), + ]), + MetricGroup("l2_code", [ + Metric("l2_code_hits", "L2 cache code hits", + d_ratio(DC_CH, l2_code_all), "100%"), + Metric("l2_code_misses", "L2 cache code misses", + d_ratio(DC_CM, l2_code_all), "100%"), + Metric("l2_code_requests", "L2 cache code requests per second", + l2_code_rate, "requests/s"), + Metric("l2_code_misses", "L2 cache code misses per second", + l2_code_miss_rate, "misses/s"), + ]), + MetricGroup("l2_evict", [ + MetricGroup("l2_evict_mef_lines", [ + Metric("l2_evict_mef_lines_l3_hot_lru", "L2 evictions M/E/F lines L3 hot LRU per second", + wbu, "HotLRU/s") if wbu else None, + Metric("l2_evict_mef_lines_l3_norm_lru", "L2 evictions M/E/F lines L3 normal LRU per second", + wbn, "NormLRU/s") if wbn else None, + Metric("l2_evict_mef_lines_dropped", "L2 evictions M/E/F lines dropped per second", + wbd, "dropped/s") if wbd else None, + Metric("l2_evict_is_lines_dropped", "L2 evictions I/S lines dropped per second", + isd, "dropped/s") if isd else None, + ]), + ]), + ], description = "L2 data cache analysis") + + +def IntelMissLat() -> Optional[MetricGroup]: + try: + ticks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS") + data_rd_loc_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", + "UNC_CHA_TOR_OCCUPANCY.IA_MISS", + "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE", + "UNC_C_TOR_OCCUPANCY.MISS_OPCODE") + data_rd_loc_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL", + "UNC_CHA_TOR_INSERTS.IA_MISS", + "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE", + "UNC_C_TOR_INSERTS.MISS_OPCODE") + data_rd_rem_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE", + "UNC_CHA_TOR_OCCUPANCY.IA_MISS", + "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE", + "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE") + data_rd_rem_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE", + "UNC_CHA_TOR_INSERTS.IA_MISS", + "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE", + "UNC_C_TOR_INSERTS.NID_MISS_OPCODE") + except: + return None + + if (data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE" or + data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_OPCODE"): + data_rd = 0x182 + for e in [data_rd_loc_occ, data_rd_loc_ins, data_rd_rem_occ, data_rd_rem_ins]: + e.name += f"/filter_opc={hex(data_rd)}/" + elif data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS": + # Demand Data Read - Full cache-line read requests from core for + # lines to be cached in S or E, typically for data + demand_data_rd = 0x202 + # LLC Prefetch Data - Uncore will first look up the line in the + # LLC; for a cache hit, the LRU will be updated, on a miss, the + # DRd will be initiated + llc_prefetch_data = 0x25a + local_filter = (f"/filter_opc0={hex(demand_data_rd)}," + f"filter_opc1={hex(llc_prefetch_data)}," + "filter_loc,filter_nm,filter_not_nm/") + remote_filter = (f"/filter_opc0={hex(demand_data_rd)}," + f"filter_opc1={hex(llc_prefetch_data)}," + "filter_rem,filter_nm,filter_not_nm/") + for e in [data_rd_loc_occ, data_rd_loc_ins]: + e.name += local_filter + for e in [data_rd_rem_occ, data_rd_rem_ins]: + e.name += remote_filter + else: + assert data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", data_rd_loc_occ + + ticks_per_cha = ticks / source_count(data_rd_loc_ins) + loc_lat = interval_sec * 1e9 * data_rd_loc_occ / (ticks_per_cha * data_rd_loc_ins) + ticks_per_cha = ticks / source_count(data_rd_rem_ins) + rem_lat = interval_sec * 1e9 * data_rd_rem_occ / (ticks_per_cha * data_rd_rem_ins) + return MetricGroup("miss_lat", [ + Metric("miss_lat_loc", "Local to a socket miss latency in nanoseconds", + loc_lat, "ns"), + Metric("miss_lat_rem", "Remote to a socket miss latency in nanoseconds", + rem_lat, "ns"), + ]) + + +def IntelMlp() -> Optional[Metric]: + try: + l1d = Event("L1D_PEND_MISS.PENDING") + l1dc = Event("L1D_PEND_MISS.PENDING_CYCLES") + except: + return None + + l1dc = Select(l1dc / 2, Literal("#smt_on"), l1dc) + ml = d_ratio(l1d, l1dc) + return Metric("mlp", + "Miss level parallelism - number of outstanding load misses per cycle (higher is better)", + ml, "load_miss_pending/cycle") + + +def IntelPorts() -> Optional[MetricGroup]: + pipeline_events = json.load(open(f"{_args.events_path}/x86/{_args.model}/pipeline.json")) + + core_cycles = Event("CPU_CLK_UNHALTED.THREAD_P_ANY", + "CPU_CLK_UNHALTED.DISTRIBUTED", + "cycles") + # Number of CPU cycles scaled for SMT. + smt_cycles = Select(core_cycles / 2, Literal("#smt_on"), core_cycles) + + metrics = [] + for x in pipeline_events: + if "EventName" in x and re.search("^UOPS_DISPATCHED.PORT", x["EventName"]): + name = x["EventName"] + port = re.search(r"(PORT_[0-9].*)", name).group(0).lower() + if name.endswith("_CORE"): + cyc = core_cycles + else: + cyc = smt_cycles + metrics.append(Metric(port, f"{port} utilization (higher is better)", + d_ratio(Event(name), cyc), "100%")) + if len(metrics) == 0: + return None + + return MetricGroup("ports", metrics, "functional unit (port) utilization -- " + "fraction of cycles each port is utilized (higher is better)") + + +def IntelSwpf() -> Optional[MetricGroup]: + ins = Event("instructions") + try: + s_ld = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS") + s_nta = Event("SW_PREFETCH_ACCESS.NTA") + s_t0 = Event("SW_PREFETCH_ACCESS.T0") + s_t1 = Event("SW_PREFETCH_ACCESS.T1_T2") + s_w = Event("SW_PREFETCH_ACCESS.PREFETCHW") + except: + return None + + all_sw = s_nta + s_t0 + s_t1 + s_w + swp_r = d_ratio(all_sw, interval_sec) + ins_r = d_ratio(ins, all_sw) + ld_r = d_ratio(s_ld, all_sw) + + return MetricGroup("swpf", [ + MetricGroup("swpf_totals", [ + Metric("swpf_totals_exec", "Software prefetch instructions per second", + swp_r, "swpf/s"), + Metric("swpf_totals_insn_per_pf", + "Average number of instructions between software prefetches", + ins_r, "insn/swpf"), + Metric("swpf_totals_loads_per_pf", + "Average number of loads between software prefetches", + ld_r, "loads/swpf"), + ]), + MetricGroup("swpf_bkdwn", [ + MetricGroup("swpf_bkdwn_nta", [ + Metric("swpf_bkdwn_nta_per_swpf", + "Software prefetch NTA instructions as a percent of all prefetch instructions", + d_ratio(s_nta, all_sw), "100%"), + Metric("swpf_bkdwn_nta_rate", + "Software prefetch NTA instructions per second", + d_ratio(s_nta, interval_sec), "insn/s"), + ]), + MetricGroup("swpf_bkdwn_t0", [ + Metric("swpf_bkdwn_t0_per_swpf", + "Software prefetch T0 instructions as a percent of all prefetch instructions", + d_ratio(s_t0, all_sw), "100%"), + Metric("swpf_bkdwn_t0_rate", + "Software prefetch T0 instructions per second", + d_ratio(s_t0, interval_sec), "insn/s"), + ]), + MetricGroup("swpf_bkdwn_t1_t2", [ + Metric("swpf_bkdwn_t1_t2_per_swpf", + "Software prefetch T1 or T2 instructions as a percent of all prefetch instructions", + d_ratio(s_t1, all_sw), "100%"), + Metric("swpf_bkdwn_t1_t2_rate", + "Software prefetch T1 or T2 instructions per second", + d_ratio(s_t1, interval_sec), "insn/s"), + ]), + MetricGroup("swpf_bkdwn_w", [ + Metric("swpf_bkdwn_w_per_swpf", + "Software prefetch W instructions as a percent of all prefetch instructions", + d_ratio(s_w, all_sw), "100%"), + Metric("swpf_bkdwn_w_rate", + "Software prefetch W instructions per second", + d_ratio(s_w, interval_sec), "insn/s"), + ]), + ]), + ], description="Software prefetch instruction breakdown") + + +def IntelLdSt() -> Optional[MetricGroup]: + if _args.model in [ + "bonnell", + "nehalemep", + "nehalemex", + "westmereep-dp", + "westmereep-sp", + "westmereex", + ]: + return None + LDST_LD = Event("MEM_INST_RETIRED.ALL_LOADS", "MEM_UOPS_RETIRED.ALL_LOADS") + LDST_ST = Event("MEM_INST_RETIRED.ALL_STORES", "MEM_UOPS_RETIRED.ALL_STORES") + LDST_LDC1 = Event(f"{LDST_LD.name}/cmask=1/") + LDST_STC1 = Event(f"{LDST_ST.name}/cmask=1/") + LDST_LDC2 = Event(f"{LDST_LD.name}/cmask=2/") + LDST_STC2 = Event(f"{LDST_ST.name}/cmask=2/") + LDST_LDC3 = Event(f"{LDST_LD.name}/cmask=3/") + LDST_STC3 = Event(f"{LDST_ST.name}/cmask=3/") + ins = Event("instructions") + LDST_CYC = Event("CPU_CLK_UNHALTED.THREAD", + "CPU_CLK_UNHALTED.CORE_P", + "CPU_CLK_UNHALTED.THREAD_P") + LDST_PRE = None + try: + LDST_PRE = Event("LOAD_HIT_PREFETCH.SWPF", "LOAD_HIT_PRE.SW_PF") + except: + pass + LDST_AT = None + try: + LDST_AT = Event("MEM_INST_RETIRED.LOCK_LOADS") + except: + pass + cyc = LDST_CYC + + ld_rate = d_ratio(LDST_LD, interval_sec) + st_rate = d_ratio(LDST_ST, interval_sec) + pf_rate = d_ratio(LDST_PRE, interval_sec) if LDST_PRE else None + at_rate = d_ratio(LDST_AT, interval_sec) if LDST_AT else None + + ldst_ret_constraint = MetricConstraint.GROUPED_EVENTS + if LDST_LD.name == "MEM_UOPS_RETIRED.ALL_LOADS": + ldst_ret_constraint = MetricConstraint.NO_GROUP_EVENTS_NMI + + return MetricGroup("ldst", [ + MetricGroup("ldst_total", [ + Metric("ldst_total_loads", "Load/store instructions total loads", + ld_rate, "loads"), + Metric("ldst_total_stores", "Load/store instructions total stores", + st_rate, "stores"), + ]), + MetricGroup("ldst_prcnt", [ + Metric("ldst_prcnt_loads", "Percent of all instructions that are loads", + d_ratio(LDST_LD, ins), "100%"), + Metric("ldst_prcnt_stores", "Percent of all instructions that are stores", + d_ratio(LDST_ST, ins), "100%"), + ]), + MetricGroup("ldst_ret_lds", [ + Metric("ldst_ret_lds_1", "Retired loads in 1 cycle", + d_ratio(max(LDST_LDC1 - LDST_LDC2, 0), cyc), "100%", + constraint = ldst_ret_constraint), + Metric("ldst_ret_lds_2", "Retired loads in 2 cycles", + d_ratio(max(LDST_LDC2 - LDST_LDC3, 0), cyc), "100%", + constraint = ldst_ret_constraint), + Metric("ldst_ret_lds_3", "Retired loads in 3 or more cycles", + d_ratio(LDST_LDC3, cyc), "100%"), + ]), + MetricGroup("ldst_ret_sts", [ + Metric("ldst_ret_sts_1", "Retired stores in 1 cycle", + d_ratio(max(LDST_STC1 - LDST_STC2, 0), cyc), "100%", + constraint = ldst_ret_constraint), + Metric("ldst_ret_sts_2", "Retired stores in 2 cycles", + d_ratio(max(LDST_STC2 - LDST_STC3, 0), cyc), "100%", + constraint = ldst_ret_constraint), + Metric("ldst_ret_sts_3", "Retired stores in 3 more cycles", + d_ratio(LDST_STC3, cyc), "100%"), + ]), + Metric("ldst_ld_hit_swpf", "Load hit software prefetches per second", + pf_rate, "swpf/s") if pf_rate else None, + Metric("ldst_atomic_lds", "Atomic loads per second", + at_rate, "loads/s") if at_rate else None, + ], description = "Breakdown of load/store instructions") + + +def UncoreCState() -> Optional[MetricGroup]: + try: + pcu_ticks = Event("UNC_P_CLOCKTICKS") + c0 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C0") + c3 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C3") + c6 = Event("UNC_P_POWER_STATE_OCCUPANCY.CORES_C6") + except: + return None + + num_cores = Literal("#num_cores") / Literal("#num_packages") + + max_cycles = pcu_ticks * num_cores; + total_cycles = c0 + c3 + c6 + + # remove fused-off cores which show up in C6/C7. + c6 = Select(max(c6 - (total_cycles - max_cycles), 0), + total_cycles > max_cycles, + c6) + + return MetricGroup("cstate", [ + Metric("cstate_c0", "C-State cores in C0/C1", d_ratio(c0, pcu_ticks), "cores"), + Metric("cstate_c3", "C-State cores in C3", d_ratio(c3, pcu_ticks), "cores"), + Metric("cstate_c6", "C-State cores in C6/C7", d_ratio(c6, pcu_ticks), "cores"), + ]) + + +def UncoreDir() -> Optional[MetricGroup]: + try: + m2m_upd = Event("UNC_M2M_DIRECTORY_UPDATE.ANY") + m2m_hits = Event("UNC_M2M_DIRECTORY_HIT.DIRTY_I") + # Turn the umask into a ANY rather than DIRTY_I filter. + m2m_hits.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_HIT.ANY/" + m2m_miss = Event("UNC_M2M_DIRECTORY_MISS.DIRTY_I") + # Turn the umask into a ANY rather than DIRTY_I filter. + m2m_miss.name += "/umask=0xFF,name=UNC_M2M_DIRECTORY_MISS.ANY/" + cha_upd = Event("UNC_CHA_DIR_UPDATE.HA") + # Turn the umask into a ANY rather than HA filter. + cha_upd.name += "/umask=3,name=UNC_CHA_DIR_UPDATE.ANY/" + except: + return None + + m2m_total = m2m_hits + m2m_miss + upd = m2m_upd + cha_upd # in cache lines + upd_r = upd / interval_sec + look_r = m2m_total / interval_sec + + scale = 64 / 1_000_000 # Cache lines to MB + return MetricGroup("dir", [ + Metric("dir_lookup_rate", "", + d_ratio(m2m_total, interval_sec), "requests/s"), + Metric("dir_lookup_hits", "", + d_ratio(m2m_hits, m2m_total), "100%"), + Metric("dir_lookup_misses", "", + d_ratio(m2m_miss, m2m_total), "100%"), + Metric("dir_update_requests", "", + d_ratio(m2m_upd + cha_upd, interval_sec), "requests/s"), + Metric("dir_update_bw", "", + d_ratio(m2m_upd + cha_upd, interval_sec), f"{scale}MB/s"), + ]) + + +def UncoreMem() -> Optional[MetricGroup]: + try: + loc_rds = Event("UNC_CHA_REQUESTS.READS_LOCAL", "UNC_H_REQUESTS.READS_LOCAL") + rem_rds = Event("UNC_CHA_REQUESTS.READS_REMOTE", "UNC_H_REQUESTS.READS_REMOTE") + loc_wrs = Event("UNC_CHA_REQUESTS.WRITES_LOCAL", "UNC_H_REQUESTS.WRITES_LOCAL") + rem_wrs = Event("UNC_CHA_REQUESTS.WRITES_REMOTE", "UNC_H_REQUESTS.WRITES_REMOTE") + except: + return None + + scale = 64 / 1_000_000 + return MetricGroup("mem", [ + MetricGroup("mem_local", [ + Metric("mem_local_read", "Local memory read bandwidth not including directory updates", + d_ratio(loc_rds, interval_sec), f"{scale}MB/s"), + Metric("mem_local_write", "Local memory write bandwidth not including directory updates", + d_ratio(loc_wrs, interval_sec), f"{scale}MB/s"), + ]), + MetricGroup("mem_remote", [ + Metric("mem_remote_read", "Remote memory read bandwidth not including directory updates", + d_ratio(rem_rds, interval_sec), f"{scale}MB/s"), + Metric("mem_remote_write", "Remote memory write bandwidth not including directory updates", + d_ratio(rem_wrs, interval_sec), f"{scale}MB/s"), + ]), + ], description = "Memory Bandwidth breakdown local vs. remote (remote requests in). directory updates not included") + + +def UncoreMemBw() -> Optional[MetricGroup]: + mem_events = [] + try: + mem_events = json.load(open(f"{os.path.dirname(os.path.realpath(__file__))}" + f"/arch/x86/{args.model}/uncore-memory.json")) + except: + pass + + ddr_rds = 0 + ddr_wrs = 0 + ddr_total = 0 + for x in mem_events: + if "EventName" in x: + name = x["EventName"] + if re.search("^UNC_MC[0-9]+_RDCAS_COUNT_FREERUN", name): + ddr_rds += Event(name) + elif re.search("^UNC_MC[0-9]+_WRCAS_COUNT_FREERUN", name): + ddr_wrs += Event(name) + #elif re.search("^UNC_MC[0-9]+_TOTAL_REQCOUNT_FREERUN", name): + # ddr_total += Event(name) + + if ddr_rds == 0: + try: + ddr_rds = Event("UNC_M_CAS_COUNT.RD") + ddr_wrs = Event("UNC_M_CAS_COUNT.WR") + except: + return None + + ddr_total = ddr_rds + ddr_wrs + + pmm_rds = 0 + pmm_wrs = 0 + try: + pmm_rds = Event("UNC_M_PMM_RPQ_INSERTS") + pmm_wrs = Event("UNC_M_PMM_WPQ_INSERTS") + except: + pass + + pmm_total = pmm_rds + pmm_wrs + + scale = 64 / 1_000_000 + return MetricGroup("mem_bw", [ + MetricGroup("mem_bw_ddr", [ + Metric("mem_bw_ddr_read", "DDR memory read bandwidth", + d_ratio(ddr_rds, interval_sec), f"{scale}MB/s"), + Metric("mem_bw_ddr_write", "DDR memory write bandwidth", + d_ratio(ddr_wrs, interval_sec), f"{scale}MB/s"), + Metric("mem_bw_ddr_total", "DDR memory write bandwidth", + d_ratio(ddr_total, interval_sec), f"{scale}MB/s"), + ], description = "DDR Memory Bandwidth"), + MetricGroup("mem_bw_pmm", [ + Metric("mem_bw_pmm_read", "PMM memory read bandwidth", + d_ratio(pmm_rds, interval_sec), f"{scale}MB/s"), + Metric("mem_bw_pmm_write", "PMM memory write bandwidth", + d_ratio(pmm_wrs, interval_sec), f"{scale}MB/s"), + Metric("mem_bw_pmm_total", "PMM memory write bandwidth", + d_ratio(pmm_total, interval_sec), f"{scale}MB/s"), + ], description = "PMM Memory Bandwidth") if pmm_rds != 0 else None, + ], description = "Memory Bandwidth") + + +def UncoreMemSat() -> Optional[Metric]: + try: + clocks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS") + sat = Event("UNC_CHA_DISTRESS_ASSERTED.VERT", "UNC_CHA_FAST_ASSERTED.VERT", + "UNC_C_FAST_ASSERTED") + except: + return None + + desc = ("Mesh Bandwidth saturation (% CBOX cycles with FAST signal asserted, " + "include QPI bandwidth saturation), lower is better") + if "UNC_CHA_" in sat.name: + desc = ("Mesh Bandwidth saturation (% CHA cycles with FAST signal asserted, " + "include UPI bandwidth saturation), lower is better") + return Metric("mem_sat", desc, d_ratio(sat, clocks), "100%") + + +def UncoreUpiBw() -> Optional[MetricGroup]: + try: + upi_rds = Event("UNC_UPI_RxL_FLITS.ALL_DATA") + upi_wrs = Event("UNC_UPI_TxL_FLITS.ALL_DATA") + except: + return None + + upi_total = upi_rds + upi_wrs + + # From "Uncore Performance Monitoring": When measuring the amount of + # bandwidth consumed by transmission of the data (i.e. NOT including + # the header), it should be .ALL_DATA / 9 * 64B. + scale = (64 / 9) / 1_000_000 + return MetricGroup("upi_bw", [ + Metric("upi_bw_read", "UPI read bandwidth", + d_ratio(upi_rds, interval_sec), f"{scale}MB/s"), + Metric("upi_bw_write", "DDR memory write bandwidth", + d_ratio(upi_wrs, interval_sec), f"{scale}MB/s"), + ], description = "UPI Bandwidth") + + +def main() -> None: + global _args + + def dir_path(path: str) -> str: + """Validate path is a directory for argparse.""" + if os.path.isdir(path): + return path + raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory') + + parser = argparse.ArgumentParser(description="Intel perf json generator") + parser.add_argument("-metricgroups", help="Generate metricgroups data", action='store_true') + parser.add_argument("model", help="e.g. skylakex") + parser.add_argument( + 'events_path', + type=dir_path, + help='Root of tree containing architecture directories containing json files' + ) + _args = parser.parse_args() + + directory = f"{_args.events_path}/x86/{_args.model}/" + LoadEvents(directory) + + all_metrics = MetricGroup("", [ + Cycles(), + Idle(), + Rapl(), + Smi(), + Tsx(), + IntelBr(), + IntelCtxSw(), + IntelFpu(), + IntelIlp(), + IntelL2(), + IntelLdSt(), + IntelMissLat(), + IntelMlp(), + IntelPorts(), + IntelSwpf(), + UncoreCState(), + UncoreDir(), + UncoreMem(), + UncoreMemBw(), + UncoreMemSat(), + UncoreUpiBw(), + ]) + + + if _args.metricgroups: + print(JsonEncodeMetricGroupDescriptions(all_metrics)) + else: + print(JsonEncodeMetric(all_metrics)) + +if __name__ == '__main__': + main() diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index bb0a5d92df4a..7b4239e8b08b 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -473,7 +473,7 @@ def add_events_table_entries(item: os.DirEntry, topic: str) -> None: for e in read_json_events(item.path, topic): if e.name: _pending_events.append(e) - if e.metric_name: + if e.metric_name and not any(e.metric_name == x.metric_name for x in _pending_metrics): _pending_metrics.append(e) @@ -612,7 +612,7 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None: if not item.is_file() or not item.name.endswith('.json'): return - if item.name == 'metricgroups.json': + if item.name.endswith('metricgroups.json'): metricgroup_descriptions = json.load(open(item.path)) for mgroup in metricgroup_descriptions: assert len(mgroup) > 1, parents @@ -665,7 +665,7 @@ def is_leaf_dir_ignoring_sys(path: str) -> bool: # Ignore other directories. If the file name does not have a .json # extension, ignore it. It could be a readme.txt for instance. - if not item.is_file() or not item.name.endswith('.json') or item.name == 'metricgroups.json': + if not item.is_file() or not item.name.endswith('.json') or item.name.endswith('metricgroups.json'): return add_events_table_entries(item, get_topic(item.name)) diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py index 92acd89ed97a..f9390677b7b6 100644 --- a/tools/perf/pmu-events/metric.py +++ b/tools/perf/pmu-events/metric.py @@ -3,9 +3,109 @@ import ast import decimal import json +import os import re +from enum import Enum from typing import Dict, List, Optional, Set, Tuple, Union +all_pmus = set() +all_events = set() +experimental_events = set() +all_events_all_models = set() + +def LoadEvents(directory: str) -> None: + """Populate a global set of all known events for the purpose of validating Event names""" + global all_pmus + global all_events + global experimental_events + global all_events_all_models + all_events = { + "context\-switches", + "cycles", + "duration_time", + "instructions", + "l2_itlb_misses", + } + for file in os.listdir(os.fsencode(directory)): + filename = os.fsdecode(file) + if filename.endswith(".json"): + for x in json.load(open(f"{directory}/{filename}")): + if "Unit" in x: + all_pmus.add(x["Unit"]) + if "EventName" in x: + all_events.add(x["EventName"]) + if "Experimental" in x and x["Experimental"] == "1": + experimental_events.add(x["EventName"]) + elif "ArchStdEvent" in x: + all_events.add(x["ArchStdEvent"]) + all_events_all_models = all_events.copy() + for root, dirs, files in os.walk(directory + ".."): + for filename in files: + if filename.endswith(".json"): + for x in json.load(open(f"{root}/{filename}")): + if "EventName" in x: + all_events_all_models.add(x["EventName"]) + elif "ArchStdEvent" in x: + all_events_all_models.add(x["ArchStdEvent"]) + + +def CheckPmu(name: str) -> bool: + return name in all_pmus + + +def CheckEvent(name: str) -> bool: + """Check the event name exists in the set of all loaded events""" + global all_events + if len(all_events) == 0: + # No events loaded so assume any event is good. + return True + + if ':' in name: + # Remove trailing modifier. + name = name[:name.find(':')] + elif '/' in name: + # Name could begin with a PMU or an event, for now assume it is good. + return True + + return name in all_events + +def CheckEveryEvent(*names: str) -> None: + """Check all the events exist in at least one json file""" + global all_events_all_models + if len(all_events_all_models) == 0: + assert len(names) == 1, f"Cannot determine valid events in {names}" + # No events loaded so assume any event is good. + return + + for name in names: + # Remove trailing modifier. + if ':' in name: + name = name[:name.find(':')] + elif '/' in name: + name = name[:name.find('/')] + if any([name.startswith(x) for x in ['amd', 'arm', 'cpu', 'msr', 'power']]): + continue + if name not in all_events_all_models: + raise Exception(f"Is {name} a named json event?") + + +def IsExperimentalEvent(name: str) -> bool: + global experimental_events + if ':' in name: + # Remove trailing modifier. + name = name[:name.find(':')] + elif '/' in name: + # Name could begin with a PMU or an event, for now assume it is not experimental. + return False + + return name in experimental_events + + +class MetricConstraint(Enum): + GROUPED_EVENTS = 0 + NO_GROUP_EVENTS = 1 + NO_GROUP_EVENTS_NMI = 2 + NO_GROUP_EVENTS_SMT = 3 class Expression: """Abstract base class of elements in a metric expression.""" @@ -22,6 +122,10 @@ def Simplify(self): """Returns a simplified version of self.""" raise NotImplementedError() + def HasExperimentalEvents(self) -> bool: + """Are experimental events used in the expression?""" + raise NotImplementedError() + def Equals(self, other) -> bool: """Returns true when two expressions are the same.""" raise NotImplementedError() @@ -189,6 +293,9 @@ def Simplify(self) -> Expression: return Operator(self.operator, lhs, rhs) + def HasExperimentalEvents(self) -> bool: + return self.lhs.HasExperimentalEvents() or self.rhs.HasExperimentalEvents() + def Equals(self, other: Expression) -> bool: if isinstance(other, Operator): return self.operator == other.operator and self.lhs.Equals( @@ -237,6 +344,10 @@ def Simplify(self) -> Expression: return Select(true_val, cond, false_val) + def HasExperimentalEvents(self) -> bool: + return (self.cond.HasExperimentalEvents() or self.true_val.HasExperimentalEvents() or + self.false_val.HasExperimentalEvents()) + def Equals(self, other: Expression) -> bool: if isinstance(other, Select): return self.cond.Equals(other.cond) and self.false_val.Equals( @@ -285,6 +396,9 @@ def Simplify(self) -> Expression: return Function(self.fn, lhs, rhs) + def HasExperimentalEvents(self) -> bool: + return self.lhs.HasExperimentalEvents() or (self.rhs and self.rhs.HasExperimentalEvents()) + def Equals(self, other: Expression) -> bool: if isinstance(other, Function): result = self.fn == other.fn and self.lhs.Equals(other.lhs) @@ -311,9 +425,22 @@ def _FixEscapes(s: str) -> str: class Event(Expression): """An event in an expression.""" - def __init__(self, name: str, legacy_name: str = ''): - self.name = _FixEscapes(name) - self.legacy_name = _FixEscapes(legacy_name) + def __init__(self, *args: str): + error = "" + CheckEveryEvent(*args) + for name in args: + if CheckEvent(name): + self.name = _FixEscapes(name) + return + if error: + error += " or " + name + else: + error = name + global all_events + raise Exception(f"No event {error} in:\n{all_events}") + + def HasExperimentalEvents(self) -> bool: + return IsExperimentalEvent(self.name) def ToPerfJson(self): result = re.sub('/', '@', self.name) @@ -332,6 +459,31 @@ def Substitute(self, name: str, expression: Expression) -> Expression: return self +class MetricRef(Expression): + """A metric reference in an expression.""" + + def __init__(self, name: str): + self.name = _FixEscapes(name) + + def ToPerfJson(self): + return self.name + + def ToPython(self): + return f'MetricRef(r"{self.name}")' + + def Simplify(self) -> Expression: + return self + + def HasExperimentalEvents(self) -> bool: + return False + + def Equals(self, other: Expression) -> bool: + return isinstance(other, MetricRef) and self.name == other.name + + def Substitute(self, name: str, expression: Expression) -> Expression: + return self + + class Constant(Expression): """A constant within the expression tree.""" @@ -352,6 +504,9 @@ def ToPython(self): def Simplify(self) -> Expression: return self + def HasExperimentalEvents(self) -> bool: + return False + def Equals(self, other: Expression) -> bool: return isinstance(other, Constant) and self.value == other.value @@ -374,6 +529,9 @@ def ToPython(self): def Simplify(self) -> Expression: return self + def HasExperimentalEvents(self) -> bool: + return False + def Equals(self, other: Expression) -> bool: return isinstance(other, Literal) and self.value == other.value @@ -423,17 +581,21 @@ class Metric: groups: Set[str] expr: Expression scale_unit: str - constraint: bool + constraint: MetricConstraint + threshold: Optional[Expression] def __init__(self, name: str, description: str, expr: Expression, scale_unit: str, - constraint: bool = False): + constraint: MetricConstraint = MetricConstraint.GROUPED_EVENTS, + threshold: Optional[Expression] = None): self.name = name self.description = description self.expr = expr.Simplify() + if self.expr.HasExperimentalEvents(): + self.description += " (metric should be considered experimental as it contains experimental events)." # Workraound valid_only_metric hiding certain metrics based on unit. scale_unit = scale_unit.replace('/sec', ' per sec') if scale_unit[0].isdigit(): @@ -441,6 +603,7 @@ def __init__(self, else: self.scale_unit = f'1{scale_unit}' self.constraint = constraint + self.threshold = threshold self.groups = set() def __lt__(self, other): @@ -449,7 +612,8 @@ def __lt__(self, other): def AddToMetricGroup(self, group): """Callback used when being added to a MetricGroup.""" - self.groups.add(group.name) + if group.name: + self.groups.add(group.name) def Flatten(self) -> Set['Metric']: """Return a leaf metric.""" @@ -464,20 +628,15 @@ def ToPerfJson(self) -> Dict[str, str]: 'MetricExpr': self.expr.ToPerfJson(), 'ScaleUnit': self.scale_unit } - if self.constraint: - result['MetricConstraint'] = 'NO_NMI_WATCHDOG' + if self.constraint != MetricConstraint.GROUPED_EVENTS: + result['MetricConstraint'] = self.constraint.name + if self.threshold: + result['MetricThreshold'] = self.threshold.ToPerfJson() return result - -class _MetricJsonEncoder(json.JSONEncoder): - """Special handling for Metric objects.""" - - def default(self, o): - if isinstance(o, Metric): - return o.ToPerfJson() - return json.JSONEncoder.default(self, o) - + def ToMetricGroupDescriptions(self, root: bool = True) -> Dict[str, str]: + return {} class MetricGroup: """A group of metrics. @@ -487,12 +646,16 @@ class MetricGroup: which can facilitate arrangements similar to trees. """ - def __init__(self, name: str, metric_list: List[Union[Metric, - 'MetricGroup']]): + def __init__(self, name: str, + metric_list: List[Union[Optional[Metric], Optional['MetricGroup']]], + description: Optional[str] = None): self.name = name - self.metric_list = metric_list + self.metric_list = [] + self.description = description for metric in metric_list: - metric.AddToMetricGroup(self) + if metric: + self.metric_list.append(metric) + metric.AddToMetricGroup(self) def AddToMetricGroup(self, group): """Callback used when a MetricGroup is added into another.""" @@ -507,11 +670,36 @@ def Flatten(self) -> Set[Metric]: return result - def ToPerfJson(self) -> str: - return json.dumps(sorted(self.Flatten()), indent=2, cls=_MetricJsonEncoder) + def ToPerfJson(self) -> List[Dict[str, str]]: + result = [] + for x in sorted(self.Flatten()): + result.append(x.ToPerfJson()) + return result + + def ToMetricGroupDescriptions(self, root: bool = True) -> Dict[str, str]: + result = {self.name: self.description} if self.description else {} + for x in self.metric_list: + result.update(x.ToMetricGroupDescriptions(False)) + return result def __str__(self) -> str: - return self.ToPerfJson() + return str(self.ToPerfJson()) + + +def JsonEncodeMetric(x: MetricGroup): + class MetricJsonEncoder(json.JSONEncoder): + """Special handling for Metric objects.""" + + def default(self, o): + if isinstance(o, Metric) or isinstance(o, MetricGroup): + return o.ToPerfJson() + return json.JSONEncoder.default(self, o) + + return json.dumps(x, indent=2, cls=MetricJsonEncoder) + + +def JsonEncodeMetricGroupDescriptions(x: MetricGroup): + return json.dumps(x.ToMetricGroupDescriptions(), indent=2) class _RewriteIfExpToSelect(ast.NodeTransformer): @@ -551,12 +739,18 @@ def ParsePerfJson(orig: str) -> Expression: r'Event(r"\1")', py) # If it started with a # it should have been a literal, rather than an event name py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py) + # Fix events wrongly broken at a ',' + while True: + prev_py = py + py = re.sub(r'Event\(r"([^"]*)"\),Event\(r"([^"]*)"\)', r'Event(r"\1,\2")', py) + if py == prev_py: + break # Convert accidentally converted hex constants ("0Event(r"xDEADBEEF)"") back to a constant, # but keep it wrapped in Event(), otherwise Python drops the 0x prefix and it gets interpreted as # a double by the Bison parser py = re.sub(r'0Event\(r"[xX]([0-9a-fA-F]*)"\)', r'Event("0x\1")', py) # Convert accidentally converted scientific notation constants back - py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py) + py = re.sub(r'([0-9]+)Event\(r"(e[0-9]*)"\)', r'\1\2', py) # Convert all the known keywords back from events to just the keyword keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count', 'has_event', 'strcmp_cpuid_str'] for kw in keywords: @@ -569,7 +763,6 @@ def ParsePerfJson(orig: str) -> Expression: parsed = ast.fix_missing_locations(parsed) return _Constify(eval(compile(parsed, orig, 'eval'))) - def RewriteMetricsInTermsOfOthers(metrics: List[Tuple[str, str, Expression]] )-> Dict[Tuple[str, str], Expression]: """Shorten metrics by rewriting in terms of others. diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py index ee22ff43ddd7..8acfe4652b55 100755 --- a/tools/perf/pmu-events/metric_test.py +++ b/tools/perf/pmu-events/metric_test.py @@ -61,6 +61,10 @@ def test_ParsePerfJson(self): after = before self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + before = r'a + 3e-12 + b' + after = before + self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) + def test_IfElseTests(self): # if-else needs rewriting to Select and back. before = r'Event1 if #smt_on else Event2' diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh index cbf2894b2c84..845f83213855 100755 --- a/tools/perf/tests/shell/stat+std_output.sh +++ b/tools/perf/tests/shell/stat+std_output.sh @@ -13,7 +13,7 @@ stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX) event_name=(cpu-clock task-clock context-switches cpu-migrations page-faults stalled-cycles-frontend stalled-cycles-backend cycles instructions branches branch-misses) event_metric=("CPUs utilized" "CPUs utilized" "/sec" "/sec" "/sec" "frontend cycles idle" "backend cycles idle" "GHz" "insn per cycle" "/sec" "of all branches") -skip_metric=("stalled cycles per insn" "tma_" "retiring" "frontend_bound" "bad_speculation" "backend_bound") +skip_metric=($(perf list --raw Default 2> /dev/null)) cleanup() { rm -f "${stat_output}" diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh index 55ef9c9ded2d..d6db192b9f18 100755 --- a/tools/perf/tests/shell/stat_all_metricgroups.sh +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -1,9 +1,7 @@ -#!/bin/sh +#!/bin/bash # perf all metricgroups test # SPDX-License-Identifier: GPL-2.0 -set -e - ParanoidAndNotRoot() { [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] @@ -14,11 +12,29 @@ if ParanoidAndNotRoot 0 then system_wide_flag="" fi - +err=0 for m in $(perf list --raw-dump metricgroups) do echo "Testing $m" - perf stat -M "$m" $system_wide_flag sleep 0.01 + result=$(perf stat -M "$m" $system_wide_flag sleep 0.01 2>&1) + result_err=$? + if [[ $result_err -gt 0 ]] + then + if [[ "$result" =~ \ + "Access to performance monitoring and observability operations is limited" ]] + then + echo "Permission failure" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + else + echo "Metric group $m failed" + echo $result + err=1 # Fail + fi + fi done -exit 0 +exit $err diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index d2a3506e0d19..42456d89c5da 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -1,23 +1,51 @@ -#!/bin/sh +#!/bin/bash # perf all PMU test # SPDX-License-Identifier: GPL-2.0 set -e +err=0 +result="" + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + echo "$result" + exit 1 +} +trap trap_cleanup EXIT TERM INT # Test all PMU events; however exclude parameterized ones (name contains '?') -for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do +for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g') +do echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) - if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "" ; then - # We failed to see the event and it is supported. Possibly the workload was - # too small so retry with something longer. - result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) - if ! echo "$result" | grep -q "$p" ; then - echo "Event '$p' not printed in:" - echo "$result" - exit 1 - fi + if echo "$result" | grep -q "$p" + then + # Event seen in output. + continue + fi + if echo "$result" | grep -q "" + then + # Event not supported, so ignore. + continue + fi + if echo "$result" | grep -q "Access to performance monitoring and observability operations is limited." + then + # Access is limited, so ignore. + continue + fi + + # We failed to see the event and it is supported. Possibly the workload was + # too small so retry with something longer. + result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) + if echo "$result" | grep -q "$p" + then + # Event seen in output. + continue fi + echo "Error: event '$p' not printed in:" + echo "$result" + err=1 done -exit 0 +trap - EXIT TERM INT +exit $err