From 73e6530346b7e3ccfc12576792f8f248cda0096b Mon Sep 17 00:00:00 2001 From: Al Grant Date: Tue, 28 Jan 2025 16:24:52 +0000 Subject: [PATCH] Add PMU-based PC sampling In recent cores, the PC sampling feature has moved from the debug interface to the PMU. Add cs_pmu_get_pc_sample(). Caller must determine whether to use this or cs_debug_get_pc_sample(). Also cleaned up some types to use stdint.h. --- include/cs_debug_sample.h | 6 ++++-- include/cs_pmu.h | 6 ++++++ include/csregisters.h | 19 +++++++++++++++++-- source/cs_access_cmnfns.h | 8 ++++++-- source/cs_debug_sample.c | 11 +++++++---- source/cs_init_manage.c | 6 +++--- source/cs_pmu.c | 40 +++++++++++++++++++++++++++++++++++++++ source/cs_topology.c | 9 ++++++++- source/cs_trace_sink.c | 4 ++-- 9 files changed, 93 insertions(+), 16 deletions(-) diff --git a/include/cs_debug_sample.h b/include/cs_debug_sample.h index 7a9e1ec..1d52e76 100644 --- a/include/cs_debug_sample.h +++ b/include/cs_debug_sample.h @@ -26,7 +26,7 @@ extern "C" { /** \defgroup debug Access to Debug Sampling Registers. - Non - intrusive interface to sampling debug architecture on CPU + Non-intrusive interface to sampling debug architecture on CPU Samples PC plus VMID and CONTEXTID if present from a running core via the debug registers. @{ @@ -39,6 +39,8 @@ extern "C" { * core is being sampled. * * Optionally, CONTEXTID and VMID can be sampled, synchronously with the PC. + * + * Note that on recent cores, the PC sampling feature has moved to the PMU device. * * \param dev device to sample - must be the debug registers on a core. * \param pc pointer to receive PC virtual address sample. Bit 0 set indicates Thumb state. @@ -47,7 +49,7 @@ extern "C" { * @return 0 if valid sample was obtained. -1 if sampling is not possible at present time. */ int cs_debug_get_pc_sample(cs_device_t dev, cs_virtaddr_t * pc, - unsigned int *cid, unsigned int *vmid); + uint32_t *cid, uint32_t *vmid); /** @} */ diff --git a/include/cs_pmu.h b/include/cs_pmu.h index ec1c22d..6783f98 100644 --- a/include/cs_pmu.h +++ b/include/cs_pmu.h @@ -134,6 +134,12 @@ int cs_pmu_reset(cs_device_t, unsigned int flags); */ int cs_pmu_is_enabled(cs_device_t); + +/** + * Get a PC sample + */ +int cs_pmu_get_pc_sample(cs_device_t, cs_virtaddr_t *pc, uint32_t *cid, uint32_t *vmid); + /** @} */ diff --git a/include/csregisters.h b/include/csregisters.h index b000768..ec21960 100644 --- a/include/csregisters.h +++ b/include/csregisters.h @@ -89,6 +89,8 @@ Common register definitions in the management group of all CoreSight devices #define CS_ARM_ARCHID_CTI 0x1a14 #define CS_ARM_ARCHID_MEMAP 0x0a17 #define CS_ARM_ARCHID_ROM 0x0af7 +#define CS_ARM_ARCHID_PMU32 0x0a16 +#define CS_ARM_ARCHID_PMU64 0x0a26 #define CS_DEVAFF0 0xFA8 /**< CS device affinity register 0 */ #define CS_DEVAFF1 0xFAC /**< CS device affinity register 1 */ @@ -120,8 +122,8 @@ with a JTAG debugger or OS device drivers. #define CS_CLAIM_AP_INTERNAL 0x01 /**< AP is in use by self-hosted software */ #define CS_CLAIM_AP_EXTERNAL 0x02 /**< AP is in use by external debugger */ -#define CS_CLAIM_DEV_EXTERNAL 0x01 /**< Non-AP device is in use by self-hosted software */ -#define CS_CLAIM_DEV_INTERNAL 0x02 /**< Non-AP device is in use by external debugger */ +#define CS_CLAIM_DEV_EXTERNAL 0x01 /**< Non-AP device is in use by external debugger */ +#define CS_CLAIM_DEV_INTERNAL 0x02 /**< Non-AP device is in use by self-hosted software */ #define CS_LAR 0xFB0 /**< CS component Software Lock access register */ #define CS_LSR 0xFB4 /**< CS component Software Lock status register */ @@ -731,6 +733,7 @@ The cross trigger matrix does not have any programmable elements so needs no reg #define CS_CTICHINSTATUS 0x138 /**< CTI Channel In Status Register */ #define CS_CTICHOUTSTATUS 0x13C /**< CTI Channel Out Status Register */ #define CS_CTIGATE 0x140 /**< Enable CTI Channel Gate Register */ +#define CS_CTIDEVCTL 0x150 /**< Target-specific device controls (CPU CTI, when FEAT_DoPD) */ #define CS_CTIITCHOUT 0xEE4 /**< CTI Integration Test Channel Output Register */ #define CS_CTIITTRIGOUT 0xEE8 /**< CTI Integration Test Trigger Output Register */ #define CS_CTIITCHIN 0xEF4 /**< CTI Integration Test Channel Input Register (latched) */ @@ -856,6 +859,9 @@ Register definitions and bitfield values for the Architecture v7 Cortex Core deb #define CS_DBGWCR(n) (0x1C0 + (n)*4) /**< Watchpoint Control */ #define CS_DBGPRCR 0x310 /**< Device Powerdown and Reset Control */ #define CS_DBGPRSR 0x314 /**< Device Powerdown and Reset Status */ +#define CS_DBGPRSR_PU 0x01 /**< Device is powered up */ + +#define CS_DBG_MIDR 0xD00 /**< Copy of MIDR_EL1. (In core PD.) */ /** @name DBGPRSR Bit Values see #CS_DBGPRSR @{*/ @@ -924,6 +930,8 @@ Register definitions and bitfield values for the Architecture v8 Cortex Core deb @ingroup reg_defs Register definitions and bitfield values for the Performance Monitoring Unit. + +This describes the 32-bit external interface. @{ */ #define CS_PMEVCNTR(n,scale) (0x000 + ((n)<<(scale))) /**< Event Count Register (n) */ @@ -932,6 +940,12 @@ Register definitions and bitfield values for the Performance Monitoring Unit. #define CS_PMXEVCNTR(n) CS_PMEVCNTR32(n) /**< Event Count Register (n) - deprecated, assumes 32-bit */ #define CS_PMCCNTR 0x07C /**< Cycle Count Register - deprecated, assumes 32-bit */ #define CS_PMCCNTRW(scale) CS_PMEVCNTR(31,scale) /**< Cycle Count Register */ +#define CS_PMPCSR 0x200 /**< Program Counter Sample Register (two words) */ +#define CS_PMCID1SR 0x208 /**< CONTEXTIDR_EL1 Sample Register */ +#define CS_PMVCIDSR 0x208 /**< EXT64: VMID and CONTEXTIDR_EL1 */ +#define CS_PMVIDSR 0x20C /**< VMID Sample Register */ +#define CS_PMCCIDSR 0x228 /**< EXT64: CONTEXTIDR_ELx */ +#define CS_PMCID2SR 0x22C /**< CONTEXTIDR_EL2 Sample Register */ #define CS_PMXEVTYPER(n) (0x400 + (n)*4) /**< Event Type Register(n) */ #define CS_PMXEVTYPER31 0x47C /**< Event Type Select Register (filter register) for CCNT */ #define CS_PMCCFILTR 0x47C /**< Cycle Counter Filter Register (alias CS_PMXEVTYPER31) */ @@ -953,6 +967,7 @@ Register definitions and bitfield values for the Performance Monitoring Unit. #define CS_PMCEID0 0xE20 /**< Common Event Identification 0 */ #define CS_PMCEID1 0xE24 /**< Common Event Identification 1 */ #define CS_PMMIR 0xE40 /**< Machine Identification Register (PMUv3.4) */ +#define CS_PMPCSCTL 0xE50 /**< PC Sample-based Profiling Control Register */ #define CS_PMAUTHSTATUS 0xFB8 /**< Authentication Status Register */ /** @} */ diff --git a/source/cs_access_cmnfns.h b/source/cs_access_cmnfns.h index 8538d90..e7053f2 100644 --- a/source/cs_access_cmnfns.h +++ b/source/cs_access_cmnfns.h @@ -149,7 +149,8 @@ struct cs_device { struct debug_props { uint32_t didr; /**< Contents of DBGDIDR */ uint32_t devid; /**< Contents of DBGDEVID, or zero when not present */ - unsigned int pcsamplereg; /**< Offset to PC sampling register */ + uint16_t pcsamplereg; /**< Offset to PC sampling register */ +#define CS_DBGPCSR_NONE 0xfff /**< PC sampling not implemented in debug i/f */ unsigned int debug_arch; /**< debug architecture */ struct cs_device *pmu; /**< PMU for this CPU */ struct cs_device *etm; /**< ETM for this CPU */ @@ -157,8 +158,11 @@ struct cs_device { } debug; struct pmu_props { uint32_t cfgr; - unsigned int n_counters; /**< Number of event counters, not including cycle counter */ + uint8_t n_counters; /**< Number of event counters, not including cycle counter */ unsigned char map_scale; /**< Spacing in the memory map (power of 2) */ + unsigned int ext64:1; /**< 64-bit external interface (FEAT_PMUv3_EXT64) */ + unsigned int pcsr:1; /**< Implements PC-sampling */ + unsigned int snapshot:1; /**< Implements Snapshot extension */ } pmu; struct cti_props { #define CTI_CHANNELS 4 diff --git a/source/cs_debug_sample.c b/source/cs_debug_sample.c index 7a138cc..4220973 100644 --- a/source/cs_debug_sample.c +++ b/source/cs_debug_sample.c @@ -22,16 +22,18 @@ /* ---------- Local functions ------------- */ /* this called from API fn if V8 core */ static int cs_debug_v8_pc_sample(struct cs_device *d, cs_virtaddr_t * pc, - unsigned int *cid, unsigned int *vmid) + uint32_t *cid, uint32_t *vmid) { uint32_t regval; cs_virtaddr_t pc_sample = 0; - /* check PC sampling support is present */ if ((d->v.debug.devid & CS_V8EDDEVID_SMPL_MSK) == - CS_V8EDDEVID_SMPL_NONE) + CS_V8EDDEVID_SMPL_NONE) { + /* PC sampling not implemented in CPU external interface - + but might be implemented in PMU external interface */ return -1; + } /* check target processor is powered, running and accessible */ regval = _cs_read(d, CS_V8EDPRSR); @@ -68,9 +70,10 @@ static int cs_debug_v8_pc_sample(struct cs_device *d, cs_virtaddr_t * pc, return 0; } + /* ========== API functions ================ */ int cs_debug_get_pc_sample(cs_device_t dev, cs_virtaddr_t * pc, - unsigned int *cid, unsigned int *vmid) + uint32_t *cid, uint32_t *vmid) { struct cs_device *d = DEV(dev); assert(d->type == DEV_CPU_DEBUG); diff --git a/source/cs_init_manage.c b/source/cs_init_manage.c index ea5e890..95aa617 100644 --- a/source/cs_init_manage.c +++ b/source/cs_init_manage.c @@ -126,13 +126,13 @@ void cs_set_default_memap(cs_device_t dev) /* Call this when the library is unloaded. This doesn't generally disable - all trace devices, but it may lock them. + all trace devices, but it may lock them and release claim-tags. */ int cs_shutdown(void) { if (G.init_called) { /* Do anything that needs memory-mapped access */ - cs_release(); + cs_release(); /* claim tags released here */ cs_checkpoint(); #ifdef UNIX_USERSPACE /* Now remove memory-mapped access */ @@ -178,7 +178,7 @@ int cs_release(void) uint32_t const tag = _cs_device_internal_claim_tag(d); if (_cs_isclaimed(d, tag)) { if (DTRACE(d)) { - diagf("!unclaiming device tag 0x%x at %" CS_PHYSFMT "", + diagf("!unclaiming device tag 0x%x at %" CS_PHYSFMT "\n", (unsigned int)tag, d->phys_addr); } _cs_unclaim(d, tag); diff --git a/source/cs_pmu.c b/source/cs_pmu.c index b814810..02fee28 100644 --- a/source/cs_pmu.c +++ b/source/cs_pmu.c @@ -264,4 +264,44 @@ int cs_pmu_is_enabled(cs_device_t dev) return _cs_isset(d, CS_PMCR, CS_PMCR_E); } + +int cs_pmu_get_pc_sample(cs_device_t dev, cs_virtaddr_t *pc, + uint32_t *cid, uint32_t *vmid) +{ + struct cs_device *d = DEV(dev); + assert(d->type == DEV_CPU_PMU); + if (!d->v.pmu.pcsr) { + /* PC sampling external interface not implemented in PMU */ + return -1; + } + uint64_t pcx; + if (d->v.pmu.ext64) { + pcx = _cs_read64(d, CS_PMPCSR); + if (cid || vmid) { + uint64_t vcid = _cs_read64(d, CS_PMVCIDSR); + if (cid) { + *cid = (uint32_t)vcid; + } + if (vmid) { + *vmid = (vcid >> 32) & 0xffff; + } + } + } else { + /* reading low word triggers sample */ + uint32_t lo = _cs_read(d, CS_PMPCSR); + uint32_t hi = _cs_read(d, CS_PMPCSR+4); + pcx = ((cs_virtaddr_t)hi << 32) | lo; + if (cid) { + *cid = _cs_read(d, CS_PMCID1SR); + } + if (vmid) { + *vmid = _cs_read(d, CS_PMVIDSR); + } + } + if (pc) { + *pc = pcx; + } + return 0; +} + /* end of cspmu.c */ diff --git a/source/cs_topology.c b/source/cs_topology.c index c543d19..fab40e4 100644 --- a/source/cs_topology.c +++ b/source/cs_topology.c @@ -351,7 +351,11 @@ static cs_device_t cs_device_or_romtable_register(cs_physaddr_t addr) d->v.debug.didr = (d->v.debug.didr & 0xFFFFFFF0) | ((devarch >> 12) & 0xF); } d->v.debug.devid = _cs_read(d, CS_V8EDDEVID); - d->v.debug.pcsamplereg = CS_DBGPCSR_40; + if ((d->v.debug.devid & 0xf) != 0) { + d->v.debug.pcsamplereg = CS_DBGPCSR_40; + } else { + d->v.debug.pcsamplereg = CS_DBGPCSR_NONE; + } } else { /* v7 arch core */ d->v.debug.debug_arch = 0x7; @@ -409,6 +413,9 @@ static cs_device_t cs_device_or_romtable_register(cs_physaddr_t addr) d->v.pmu.n_counters = n; } } + d->v.pmu.ext64 = (devarch & 0xfff) == CS_ARM_ARCHID_PMU64; + d->v.pmu.pcsr = (devid & 0xf) != 0; + d->v.pmu.snapshot = ((devid >> 4) & 0xf) != 0; /* Set up the scale for indexing into the PMU counters in memory */ d->v.pmu.map_scale = ((d->v.pmu.cfgr & 0x00003f00) == 0x00003f00) ? 3 : 2; diff --git a/source/cs_trace_sink.c b/source/cs_trace_sink.c index d8692bf..6ee9b7a 100644 --- a/source/cs_trace_sink.c +++ b/source/cs_trace_sink.c @@ -299,7 +299,7 @@ int cs_get_trace_data(cs_device_t dev, void *buf, unsigned int size) _cs_read(d, CS_TMC_CBUFLEVEL)); } while (words_left_to_read > 0) { - unsigned int data = etb_read_reg ? *etb_read_reg : _cs_read(d, CS_ETB_RAM_DATA); + uint32_t data = etb_read_reg ? *etb_read_reg : _cs_read(d, CS_ETB_RAM_DATA); if (0) { printf("read %08x, read ptr now %08x\n", data, _cs_read(d, CS_ETB_RAM_RD_PTR)); } @@ -329,7 +329,7 @@ int cs_get_trace_data(cs_device_t dev, void *buf, unsigned int size) if (d->v.etb.is_tmc_device) { /* The TMC spec says that once we've read all the data in the buffer, subsequent reads will read 0xFFFFFFFF. */ - unsigned int checkff = *etb_read_reg; + uint32_t checkff = *etb_read_reg; if (checkff != 0xFFFFFFFF) { diagf(" TMC ETB read 0x%08X, expected 0xFFFFFFFF\n", checkff);