Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rdpmc topdown #238

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
3286984
perf_event: added subdbg to mmap_read_self
Aug 7, 2024
2e03da3
validation_tests: Began stub validation test for validating topdown e…
Aug 8, 2024
30496ea
validation_tests: added calculations for the case when rdpmc is disabled
Aug 8, 2024
61dcd5b
validation_tests: topdown test now compares rdpmc instruction results…
Aug 12, 2024
daaf301
vaidation_tests: added percentage comparison code to topdown_validation
Aug 13, 2024
4d37365
validation_tests: fixed infinite loop in topdown_validation
Aug 13, 2024
d01aa15
validation_tests: topdown_validation now collects absolute error
Aug 14, 2024
8e7e566
perf_event: commenting out pe_ctl->reset_flag=0 fixes rdpmc enabled t…
Aug 15, 2024
2b5a744
validation_tests: separated _rdpmc and papi reads in topdown_validati…
Aug 30, 2024
426c099
topdown_validation: added macro to select what test to run in topdown…
Aug 30, 2024
6da16c0
validation_tests: increased tolerance for percentages in topdown_vali…
Aug 30, 2024
5b29362
validation_tests: fixed error calculation bug in topdown_validation
Sep 3, 2024
2f15a8d
libpfm4: added missing topdown events for intel_adl_glc_events.h
Sep 10, 2024
05a9001
validation_tests: refactored topdown_validation to enable the testing…
Sep 10, 2024
2e8260b
validation_tests: finished rewriting topdown_validation to test both …
Sep 10, 2024
0346ad0
libpfm4: added descriptions for the missing topdown events to intel_a…
Sep 10, 2024
37e9d4d
validation_tests: added test_pass call to topdown_validation
Sep 10, 2024
2f80b9b
papi_events: added patch that includes preset events for adl_glc top …
Sep 11, 2024
2f83dd6
perf_event: added 'metric' field to pe_event_info_t that is set for T…
Sep 12, 2024
eefe432
validation_tests: finalized topdown_validation test
Sep 16, 2024
7f0f22e
Merge branch 'master' into rdpmc-topdown
willowec Sep 18, 2024
da173a9
Merge branch 'master' into rdpmc-topdown
willowec Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/components/perf_event/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -1467,7 +1467,7 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl )
pe_ctl->events[i].event_fd);
ret=ioctl( pe_ctl->events[i].event_fd,
PERF_EVENT_IOC_ENABLE, NULL) ;
if (_perf_event_vector.cmp_info.fast_counter_read) {
if (_perf_event_vector.cmp_info.fast_counter_read && !pe_ctl->events[i].metric) {
pe_ctl->reset_counts[i] = 0LL;
pe_ctl->reset_flag = 0;
}
Expand Down Expand Up @@ -1622,6 +1622,13 @@ _pe_update_control_state( hwd_control_state_t *ctl,
// pe_ctl->events[i].attr.exclude_hv = !(pe_ctl->domain & PAPI_DOM_SUPERVISOR);
// }

/* Intel's topdown events need to be handled differently than normal events */
/* They are instantaneous values and should not be accumulated. In case more */
/* types of events like this are discovered, the 'metric' flag is set to */
/* handle this behavior. */
if (strcmp(ntv_evt->base_name, "TOPDOWN") == 0) {
pe_ctl->events[i].metric = 1;
}

// set the cpu number provided with an event mask if there was one (will be -1 if mask not provided)
pe_ctl->events[i].cpu = ntv_evt->cpu;
Expand Down
23 changes: 12 additions & 11 deletions src/components/perf_event/perf_event_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@

typedef struct
{
int group_leader_fd; /* fd of group leader */
int event_fd; /* fd of event */
int event_opened; /* event successfully opened */
int profiling; /* event is profiling */
int sampling; /* event is a sampling event */
uint32_t nr_mmap_pages; /* number pages in the mmap buffer */
void *mmap_buf; /* used for control/profiling */
uint64_t tail; /* current read location in mmap buffer */
uint64_t mask; /* mask used for wrapping the pages */
int cpu; /* cpu associated with this event */
struct perf_event_attr attr; /* perf_event config structure */
int group_leader_fd; /* fd of group leader */
int event_fd; /* fd of event */
int event_opened; /* event successfully opened */
int profiling; /* event is profiling */
int sampling; /* event is a sampling event */
int metric; /* event is a metric event (e.g topdown) */
uint32_t nr_mmap_pages; /* number pages in the mmap buffer */
void *mmap_buf; /* used for control/profiling */
uint64_t tail; /* current read location in mmap buffer */
uint64_t mask; /* mask used for wrapping the pages */
int cpu; /* cpu associated with this event */
struct perf_event_attr attr; /* perf_event config structure */
} pe_event_info_t;


Expand Down
3 changes: 3 additions & 0 deletions src/components/perf_event/perf_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ static inline unsigned long long mmap_read_self(void *addr,
unsigned long long *en,
unsigned long long *ru) {

SUBDBG("ENTER: addr: %p, user_reset_flag: %d, reset: %llu, en: %llu, ru: %llu\n",
addr, user_reset_flag, reset, *en, *ru);

struct perf_event_mmap_page *pc = addr;

uint32_t seq, time_mult = 0, time_shift = 0, index, width;
Expand Down
19 changes: 17 additions & 2 deletions src/libpfm4/lib/events/intel_adl_glc_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -1552,12 +1552,12 @@ static const intel_x86_umask_t adl_glc_topdown[]={
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "BR_MISPREDICT_SLOTS",
.udesc = "TMA slots wasted due to incorrect speculation by branch mispredictions",
.udesc = "TMA slots wasted due to incorrect speculation by branch mispredictions (Topdown L2)",
.ucode = 0x8500ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "MEMORY_BOUND_SLOTS",
.udesc = "TMA slots wasted due to memory accesses (TopdownL2)",
.udesc = "TMA slots wasted due to memory accesses (Topdown L2)",
.ucode = 0x8700ull,
.uflags = INTEL_X86_NCOMBO,
},
Expand All @@ -1566,6 +1566,21 @@ static const intel_x86_umask_t adl_glc_topdown[]={
.ucode = 0x8000ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "FRONTEND_BOUND_SLOTS",
.udesc = "TMA slots where the front-end did not deliver uops (Topdown L1)",
.ucode = 0x8200ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "HEAVY_OPS_SLOTS",
.udesc = "TMA slots where heavy-weight instructions are retiring (Topdown L2)",
.ucode = 0x8400ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "FETCH_LAT_SLOTS",
.udesc = "TMA slots wasted due to front-end latency issues (Topdown L2)",
.ucode = 0x8600ull,
.uflags = INTEL_X86_NCOMBO,
},
{ .uname = "SLOTS",
.udesc = "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
.ucode = 0x0400ull,
Expand Down
34 changes: 28 additions & 6 deletions src/papi_events.csv
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,8 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
# Intel Ice Lake SP events
CPU,icx
CPU,icl
# Note: Many Ice Lake events work for Alderlake/Raptorlake P-Core
CPU,adl_glc
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
Expand All @@ -991,18 +993,15 @@ PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD
# L2 cache
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
# L3 cache
Expand All @@ -1014,8 +1013,6 @@ PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS
PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
# SMP
PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
# Branches
PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
Expand All @@ -1024,6 +1021,15 @@ PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES

CPU,icx
CPU,icl
# L2
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
# SMP
PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
#FLOPs
# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
Expand All @@ -1034,7 +1040,23 @@ PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARI
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
# End of icx, icl list

CPU,adl_glc
# L2
#PRESET,PAPI_L2_DCA,NOT_DERIVED,DERIVED_SUB,L2_RQSTS.REFERENCES,L2_RQSTS.ALL_CODE_RD
PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS.REFERENCES
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS.ALL_DEMAND_MISS
# SMP
PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:DATA_RD
#FLOPs
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE
PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|4|*|+|N4|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE
PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE
PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE
# End of icx, icl, adl_glc list

# Intel Sapphire Rapids events
CPU,spr
Expand Down
3 changes: 3 additions & 0 deletions src/validation_tests/Makefile.recipies
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ALL = fp_validation_hl \
cycles_validation flops_validation \
topdown_validation \
papi_br_cn papi_br_ins papi_br_msp \
papi_br_ntk papi_br_prc papi_br_tkn papi_br_ucn \
papi_dp_ops papi_fp_ops papi_sp_ops papi_hw_int \
Expand Down Expand Up @@ -47,6 +48,8 @@ cycles_validation: cycles_validation.o $(TESTLIB) $(PAPILIB) display_error.o ins
flops_validation: flops_validation.o $(TESTLIB) $(PAPILIB) display_error.o branches_testcode.o flops_testcode.o
$(CC) -o flops_validation flops_validation.o $(TESTLIB) display_error.o branches_testcode.o flops_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)

topdown_validation: topdown_validation.o $(TESTLIB) $(PAPILIB) instructions_testcode.o
$(CC) -o topdown_validation topdown_validation.o $(TESTLIB) instructions_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)

memleak_check: memleak_check.o $(TESTLIB) $(PAPILIB) display_error.o branches_testcode.o
$(CC) -o memleak_check memleak_check.o $(TESTLIB) display_error.o branches_testcode.o $(PAPILIB) $(LDFLAGS) $(LDFLAGS) $(EXTRALIB)
Expand Down
Loading