From 2dfd7cb461970ff1f14fc1b812466c4eeba1d0ce Mon Sep 17 00:00:00 2001 From: Edmund Grimley Evans Date: Fri, 17 Jan 2025 23:06:55 +0000 Subject: [PATCH] i#3699 ARM: Add padding to priv_mcontext_t for 8-byte alignment. (#7191) ef4482e13 caused a few additional failures. In particular, the child of a clone3 system call started with the register values shifted: R2 got the value that should be in R1 and so on. That caused the test linux.clone to fail. It turns out that some callers of insert_{push,pop}_all_registers expect that an unpadded priv_mcontext_t will be pushed/popped. Moving the 4-byte padding to above the struct instead of below it makes some tests pass but breaks others. So we add some padding in the middle of priv_mcontext_t, changing its size from 324 to 328. It will probably run slightly faster with the struct (and particularly the field "simd") 8-byte aligned. Update insert_{push,pop}_all_registers and arm.asm for the new struct layout, with some additional changes to arm.asm: + Delete irrelevant X64 macros. + Document the macros more clearly (they can be found with grep). + Change SP only at start and end of function (standard practice, makes debugging easier). + Avoid writing below SP (compatible with signal handlers). This change breaks compatibility so the version is increased to 11.90. Issue: #3699 --- .github/workflows/ci-docs.yml | 2 +- .github/workflows/ci-package.yml | 12 ++-- CMakeLists.txt | 2 +- api/docs/release.dox | 4 +- core/arch/aarchxx/mangle.c | 20 +++--- core/arch/arm/arm.asm | 107 ++++++++++++++++--------------- core/lib/mcxtx_api.h | 1 + 7 files changed, 76 insertions(+), 72 deletions(-) diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml index 4fd2cc5d7d4..f49640fb412 100644 --- a/.github/workflows/ci-docs.yml +++ b/.github/workflows/ci-docs.yml @@ -90,7 +90,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi diff --git a/.github/workflows/ci-package.yml b/.github/workflows/ci-package.yml index 6d761225edd..a8c624da633 100644 --- a/.github/workflows/ci-package.yml +++ b/.github/workflows/ci-package.yml @@ -103,7 +103,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -196,7 +196,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -285,7 +285,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -374,7 +374,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -457,7 +457,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -541,7 +541,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER="11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))" + export VERSION_NUMBER="11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))" export PREFIX="cronbuild-" else export VERSION_NUMBER=${{ github.event.inputs.version }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 5daf6658d6e..676552dfaa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -606,7 +606,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn") # N.B.: When updating this, update all the default versions in ci-package.yml # and ci-docs.yml. We should find a way to share (xref i#1565). -set(VERSION_NUMBER_DEFAULT "11.3.${VERSION_NUMBER_PATCHLEVEL}") +set(VERSION_NUMBER_DEFAULT "11.90.${VERSION_NUMBER_PATCHLEVEL}") # do not store the default VERSION_NUMBER in the cache to prevent a stale one # from preventing future version updates in a pre-existing build dir set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default") diff --git a/api/docs/release.dox b/api/docs/release.dox index 759ecade1ef..5f2c8ef344f 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -126,7 +126,9 @@ clients. The changes between version \DR_VERSION and 11.3.0 include the following compatibility changes: - - No compatibility changes yet. + - The size of #dr_mcontext_t on 32-bit Arm has been increased by 4 so that + the struct can be pushed onto the 8-byte aligned stack without additional + padding. The offset of the field "simd" has changed. Further non-compatibility-affecting changes include: - Added support for reading a single drmemtrace trace file from stdin diff --git a/core/arch/aarchxx/mangle.c b/core/arch/aarchxx/mangle.c index 0bd8b70a0ee..7630dd6f78e 100644 --- a/core/arch/aarchxx/mangle.c +++ b/core/arch/aarchxx/mangle.c @@ -600,6 +600,11 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment())); + /* padding */ + PRE(ilist, instr, + XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); + dstack_offs += XSP_SZ; + /* pc and aflags */ if (cci->skip_save_flags) { /* even if we skip flag saves we want to keep mcontext shape */ @@ -642,7 +647,6 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, if (spill) PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch, slot)); } - ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment())); /* We rely on dr_get_mcontext_priv() to fill in the app's stolen reg value * and sp value. @@ -668,12 +672,6 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, DR_REG_LIST_LENGTH_ARM, DR_REG_LIST_ARM)); dstack_offs += DR_REG_LIST_LENGTH_ARM * XSP_SZ; } - /* Make dstack_offs 8-byte aligned as we have just pushed an odd - * number of 4-byte registers. - */ - PRE(ilist, instr, - XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); - dstack_offs += XSP_SZ; ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment())); #endif @@ -779,9 +777,6 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist } #else - /* This undoes the XINST_CREATE_sub done for alignment in XINST_CREATE_sub. */ - PRE(ilist, instr, - XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); /* We rely on dr_set_mcontext_priv() to set the app's stolen reg value, * and the stack swap to set the sp value: we assume the stolen reg on * the stack still has our TLS base in it. @@ -816,6 +811,11 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist OPND_CREATE_INT_MSR_NZCVQG(), opnd_create_reg(scratch))); PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch, slot)); } + + /* padding */ + PRE(ilist, instr, + XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); + /* FIXME i#1551: once we have cci->num_simd_skip, skip this if possible */ PRE(ilist, instr, INSTR_CREATE_vldm_wb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), SIMD_REG_LIST_LEN, diff --git a/core/arch/arm/arm.asm b/core/arch/arm/arm.asm index 5e7eaa5ad0f..e48cac60fa3 100644 --- a/core/arch/arm/arm.asm +++ b/core/arch/arm/arm.asm @@ -1,5 +1,6 @@ /* ********************************************************** * Copyright (c) 2014-2022 Google, Inc. All rights reserved. + * Copyright (c) 2025 Arm Limited. All rights reserved. * ********************************************************** */ /* @@ -54,31 +55,17 @@ DECL_EXTERN(initstack_mutex) #define SAVE_TO_DCONTEXT_VIA_REG(reg,offs,src) str src, PTRSZ [reg, POUND (offs)] /* offsetof(dcontext_t, dstack) */ -#define dstack_OFFSET 0x16c +#define dstack_OFFSET 0x170 /* offsetof(dcontext_t, is_exiting) */ #define is_exiting_OFFSET (dstack_OFFSET+1*ARG_SZ) -#ifdef X64 -# define MCXT_NUM_SIMD_SLOTS 32 -# define SIMD_REG_SIZE 64 -# define MCXT_NUM_PRED_SLOTS 17 /* P regs + FFR */ -# define PRED_REG_SIZE 8 -# define NUM_GPR_SLOTS 33 /* incl flags */ -# define GPR_REG_SIZE 8 -#else -# define MCXT_NUM_SIMD_SLOTS 16 -# define SIMD_REG_SIZE 16 -# define MCXT_NUM_PRED_SLOTS 0 -# define PRED_REG_SIZE 0 -# define NUM_GPR_SLOTS 17 /* incl flags */ -# define GPR_REG_SIZE 4 -#endif -#define PRE_SIMD_PADDING 0 -#define PRIV_MCXT_SIMD_SIZE (PRE_SIMD_PADDING + MCXT_NUM_SIMD_SLOTS*SIMD_REG_SIZE \ - + MCXT_NUM_PRED_SLOTS*PRED_REG_SIZE) -#define PRIV_MCXT_SIZE (NUM_GPR_SLOTS*GPR_REG_SIZE + PRIV_MCXT_SIMD_SIZE) -#define PRIV_MCXT_SP_FROM_SIMD (-(4*GPR_REG_SIZE)) /* flags, pc, lr, then sp */ -#define PRIV_MCXT_PC_FROM_SIMD (-(2*GPR_REG_SIZE)) /* flags, then pc */ +/* The struct priv_mcontext_t is defined in mcxtx_api.h. */ +#define PRIV_MCXT_SIZE 0x148 /* sizeof(priv_mcontext_t) */ +#define PRIV_MCXT_SP_OFFSET 0x34 /* offsetof(priv_mcontext_t, sp) */ +#define PRIV_MCXT_LR_OFFSET 0x38 /* offsetof(priv_mcontext_t, lr) */ +#define PRIV_MCXT_PC_OFFSET 0x3c /* offsetof(priv_mcontext_t, pc) */ +#define PRIV_MCXT_CPSR_OFFSET 0x40 /* offsetof(priv_mcontext_t, cpsr) */ +#define PRIV_MCXT_SIMD_OFFSET 0x48 /* offsetof(priv_mcontext_t, simd) */ #ifndef UNIX # error Non-Unix is not supported @@ -181,24 +168,31 @@ GLOBAL_LABEL(dr_call_on_clean_stack:) #ifdef DR_APP_EXPORTS DECLARE_EXPORTED_FUNC(dr_app_start) GLOBAL_LABEL(dr_app_start:) - push {lr} - vstmdb sp!, {d16-d31} - vstmdb sp!, {d0-d15} - mrs REG_R0, cpsr /* r0 is scratch */ - push {REG_R0} - /* We can't push all regs w/ writeback */ - stmdb sp, {REG_R0-r15} - str lr, [sp, #(PRIV_MCXT_PC_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */ - /* we need the sp at function entry */ - mov REG_R0, sp - add REG_R0, REG_R0, #(PRIV_MCXT_SIMD_SIZE + 8) /* offset simd,cpsr,lr */ - str REG_R0, [sp, #(PRIV_MCXT_SP_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */ - sub sp, sp, #(PRIV_MCXT_SIZE-PRIV_MCXT_SIMD_SIZE-4) /* simd,cpsr */ - mov REG_R0, sp +#if PRIV_MCXT_SIZE % 8 != 0 +# error Size of priv_mcontext_t should be 8-byte aligned. +#endif + /* space for mcontext + padding + LR (stack must be 8-byte aligned */ + sub sp, sp, #(PRIV_MCXT_SIZE + 8) + str lr, [sp, #(PRIV_MCXT_SIZE + 4)] +#if PRIV_MCXT_R0_OFFSET != 0 +# error +#endif + stmia sp, {r0-r12} + add r0, sp, #(PRIV_MCXT_SIZE + 8) /* get SP at function entry */ + str r0, [sp, #PRIV_MCXT_SP_OFFSET] + str lr, [sp, #PRIV_MCXT_LR_OFFSET] + str lr, [sp, #PRIV_MCXT_PC_OFFSET] /* save LR as PC */ + mrs r0, cpsr + str r0, [sp, #PRIV_MCXT_CPSR_OFFSET] + add r0, sp, #PRIV_MCXT_SIMD_OFFSET + vstmia r0!, {d0-d15} + vstmia r0!, {d16-d31} + mov r0, sp CALLC1(GLOBAL_REF(dr_app_start_helper), REG_R0) /* if we get here, DR is not taking over */ + ldr lr, [sp, #(PRIV_MCXT_SIZE + 4)] add sp, sp, #PRIV_MCXT_SIZE - pop {pc} + bx lr END_FUNC(dr_app_start) /* @@ -230,25 +224,32 @@ GLOBAL_LABEL(dr_app_running_under_dynamorio:) */ DECLARE_EXPORTED_FUNC(dynamorio_app_take_over) GLOBAL_LABEL(dynamorio_app_take_over:) - push {lr} - vstmdb sp!, {d16-d31} - vstmdb sp!, {d0-d15} - mrs REG_R0, cpsr /* r0 is scratch */ - push {REG_R0} - /* We can't push all regs w/ writeback */ - stmdb sp, {REG_R0-r15} - str lr, [sp, #(PRIV_MCXT_PC_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */ - /* we need the sp at function entry */ - mov REG_R0, sp - add REG_R0, REG_R0, #(PRIV_MCXT_SIMD_SIZE + 8) /* offset simd,cpsr,lr */ - str REG_R0, [sp, #(PRIV_MCXT_SP_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */ - sub sp, sp, #(PRIV_MCXT_SIZE-PRIV_MCXT_SIMD_SIZE-4) /* simd,cpsr */ - mov REG_R0, sp +#if PRIV_MCXT_SIZE % 8 != 0 +# error Size of priv_mcontext_t should be 8-byte aligned. +#endif + /* space for mcontext + padding + LR (stack must be 8-byte aligned */ + sub sp, sp, #(PRIV_MCXT_SIZE + 8) + str lr, [sp, #(PRIV_MCXT_SIZE + 4)] +#if PRIV_MCXT_R0_OFFSET != 0 +# error +#endif + stmia sp, {r0-r12} + add r0, sp, #(PRIV_MCXT_SIZE + 8) /* get SP at function entry */ + str r0, [sp, #PRIV_MCXT_SP_OFFSET] + str lr, [sp, #PRIV_MCXT_LR_OFFSET] + str lr, [sp, #PRIV_MCXT_PC_OFFSET] /* save LR as PC */ + mrs r0, cpsr + str r0, [sp, #PRIV_MCXT_CPSR_OFFSET] + add r0, sp, #PRIV_MCXT_SIMD_OFFSET + vstmia r0!, {d0-d15} + vstmia r0!, {d16-d31} + mov r0, sp CALLC1(GLOBAL_REF(dynamorio_app_take_over_helper), REG_R0) /* if we get here, DR is not taking over */ + ldr lr, [sp, #(PRIV_MCXT_SIZE + 4)] add sp, sp, #PRIV_MCXT_SIZE - pop {pc} - END_FUNC(dynamorio_app_take_over) + bx lr + END_FUNC(dr_app_start) /* diff --git a/core/lib/mcxtx_api.h b/core/lib/mcxtx_api.h index 8682cab1b1b..5e78c519386 100644 --- a/core/lib/mcxtx_api.h +++ b/core/lib/mcxtx_api.h @@ -128,6 +128,7 @@ uint apsr; /**< The application program status registers in AArch32. */ uint cpsr; /**< The current program status registers in AArch32. */ }; /**< The anonymous union of alternative names for apsr/cpsr register. */ + byte padding[4]; /**< The padding to get simd field 8-byte aligned. */ # endif /* 64/32-bit */ # ifdef X64 /* 64-bit */