diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml index 4fd2cc5d7d4..f49640fb412 100644 --- a/.github/workflows/ci-docs.yml +++ b/.github/workflows/ci-docs.yml @@ -90,7 +90,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi diff --git a/.github/workflows/ci-package.yml b/.github/workflows/ci-package.yml index 6d761225edd..a8c624da633 100644 --- a/.github/workflows/ci-package.yml +++ b/.github/workflows/ci-package.yml @@ -103,7 +103,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -196,7 +196,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -285,7 +285,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -374,7 +374,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -457,7 +457,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=11.3.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=11.90.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -541,7 +541,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER="11.3.$((`git log -n 1 --format=%ct` / (60*60*24)))" + export VERSION_NUMBER="11.90.$((`git log -n 1 --format=%ct` / (60*60*24)))" export PREFIX="cronbuild-" else export VERSION_NUMBER=${{ github.event.inputs.version }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 5daf6658d6e..676552dfaa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -606,7 +606,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn") # N.B.: When updating this, update all the default versions in ci-package.yml # and ci-docs.yml. We should find a way to share (xref i#1565). -set(VERSION_NUMBER_DEFAULT "11.3.${VERSION_NUMBER_PATCHLEVEL}") +set(VERSION_NUMBER_DEFAULT "11.90.${VERSION_NUMBER_PATCHLEVEL}") # do not store the default VERSION_NUMBER in the cache to prevent a stale one # from preventing future version updates in a pre-existing build dir set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default") diff --git a/api/docs/release.dox b/api/docs/release.dox index 759ecade1ef..5f2c8ef344f 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -126,7 +126,9 @@ clients. The changes between version \DR_VERSION and 11.3.0 include the following compatibility changes: - - No compatibility changes yet. + - The size of #dr_mcontext_t on 32-bit Arm has been increased by 4 so that + the struct can be pushed onto the 8-byte aligned stack without additional + padding. The offset of the field "simd" has changed. Further non-compatibility-affecting changes include: - Added support for reading a single drmemtrace trace file from stdin diff --git a/core/arch/aarchxx/mangle.c b/core/arch/aarchxx/mangle.c index 0bd8b70a0ee..7630dd6f78e 100644 --- a/core/arch/aarchxx/mangle.c +++ b/core/arch/aarchxx/mangle.c @@ -600,6 +600,11 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, ASSERT(proc_num_simd_registers() == MCXT_NUM_SIMD_SLOTS); ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment())); + /* padding */ + PRE(ilist, instr, + XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); + dstack_offs += XSP_SZ; + /* pc and aflags */ if (cci->skip_save_flags) { /* even if we skip flag saves we want to keep mcontext shape */ @@ -642,7 +647,6 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, if (spill) PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch, slot)); } - ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment())); /* We rely on dr_get_mcontext_priv() to fill in the app's stolen reg value * and sp value. @@ -668,12 +672,6 @@ insert_push_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, DR_REG_LIST_LENGTH_ARM, DR_REG_LIST_ARM)); dstack_offs += DR_REG_LIST_LENGTH_ARM * XSP_SZ; } - /* Make dstack_offs 8-byte aligned as we have just pushed an odd - * number of 4-byte registers. - */ - PRE(ilist, instr, - XINST_CREATE_sub(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); - dstack_offs += XSP_SZ; ASSERT(ALIGNED(dstack_offs, get_ABI_stack_alignment())); #endif @@ -779,9 +777,6 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist } #else - /* This undoes the XINST_CREATE_sub done for alignment in XINST_CREATE_sub. */ - PRE(ilist, instr, - XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); /* We rely on dr_set_mcontext_priv() to set the app's stolen reg value, * and the stack swap to set the sp value: we assume the stolen reg on * the stack still has our TLS base in it. @@ -816,6 +811,11 @@ insert_pop_all_registers(dcontext_t *dcontext, clean_call_info_t *cci, instrlist OPND_CREATE_INT_MSR_NZCVQG(), opnd_create_reg(scratch))); PRE(ilist, instr, instr_create_restore_from_tls(dcontext, scratch, slot)); } + + /* padding */ + PRE(ilist, instr, + XINST_CREATE_add(dcontext, opnd_create_reg(DR_REG_SP), OPND_CREATE_INT(XSP_SZ))); + /* FIXME i#1551: once we have cci->num_simd_skip, skip this if possible */ PRE(ilist, instr, INSTR_CREATE_vldm_wb(dcontext, OPND_CREATE_MEMLIST(DR_REG_SP), SIMD_REG_LIST_LEN, diff --git a/core/arch/arm/arm.asm b/core/arch/arm/arm.asm index 5e7eaa5ad0f..e48cac60fa3 100644 --- a/core/arch/arm/arm.asm +++ b/core/arch/arm/arm.asm @@ -1,5 +1,6 @@ /* ********************************************************** * Copyright (c) 2014-2022 Google, Inc. All rights reserved. + * Copyright (c) 2025 Arm Limited. All rights reserved. * ********************************************************** */ /* @@ -54,31 +55,17 @@ DECL_EXTERN(initstack_mutex) #define SAVE_TO_DCONTEXT_VIA_REG(reg,offs,src) str src, PTRSZ [reg, POUND (offs)] /* offsetof(dcontext_t, dstack) */ -#define dstack_OFFSET 0x16c +#define dstack_OFFSET 0x170 /* offsetof(dcontext_t, is_exiting) */ #define is_exiting_OFFSET (dstack_OFFSET+1*ARG_SZ) -#ifdef X64 -# define MCXT_NUM_SIMD_SLOTS 32 -# define SIMD_REG_SIZE 64 -# define MCXT_NUM_PRED_SLOTS 17 /* P regs + FFR */ -# define PRED_REG_SIZE 8 -# define NUM_GPR_SLOTS 33 /* incl flags */ -# define GPR_REG_SIZE 8 -#else -# define MCXT_NUM_SIMD_SLOTS 16 -# define SIMD_REG_SIZE 16 -# define MCXT_NUM_PRED_SLOTS 0 -# define PRED_REG_SIZE 0 -# define NUM_GPR_SLOTS 17 /* incl flags */ -# define GPR_REG_SIZE 4 -#endif -#define PRE_SIMD_PADDING 0 -#define PRIV_MCXT_SIMD_SIZE (PRE_SIMD_PADDING + MCXT_NUM_SIMD_SLOTS*SIMD_REG_SIZE \ - + MCXT_NUM_PRED_SLOTS*PRED_REG_SIZE) -#define PRIV_MCXT_SIZE (NUM_GPR_SLOTS*GPR_REG_SIZE + PRIV_MCXT_SIMD_SIZE) -#define PRIV_MCXT_SP_FROM_SIMD (-(4*GPR_REG_SIZE)) /* flags, pc, lr, then sp */ -#define PRIV_MCXT_PC_FROM_SIMD (-(2*GPR_REG_SIZE)) /* flags, then pc */ +/* The struct priv_mcontext_t is defined in mcxtx_api.h. */ +#define PRIV_MCXT_SIZE 0x148 /* sizeof(priv_mcontext_t) */ +#define PRIV_MCXT_SP_OFFSET 0x34 /* offsetof(priv_mcontext_t, sp) */ +#define PRIV_MCXT_LR_OFFSET 0x38 /* offsetof(priv_mcontext_t, lr) */ +#define PRIV_MCXT_PC_OFFSET 0x3c /* offsetof(priv_mcontext_t, pc) */ +#define PRIV_MCXT_CPSR_OFFSET 0x40 /* offsetof(priv_mcontext_t, cpsr) */ +#define PRIV_MCXT_SIMD_OFFSET 0x48 /* offsetof(priv_mcontext_t, simd) */ #ifndef UNIX # error Non-Unix is not supported @@ -181,24 +168,31 @@ GLOBAL_LABEL(dr_call_on_clean_stack:) #ifdef DR_APP_EXPORTS DECLARE_EXPORTED_FUNC(dr_app_start) GLOBAL_LABEL(dr_app_start:) - push {lr} - vstmdb sp!, {d16-d31} - vstmdb sp!, {d0-d15} - mrs REG_R0, cpsr /* r0 is scratch */ - push {REG_R0} - /* We can't push all regs w/ writeback */ - stmdb sp, {REG_R0-r15} - str lr, [sp, #(PRIV_MCXT_PC_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */ - /* we need the sp at function entry */ - mov REG_R0, sp - add REG_R0, REG_R0, #(PRIV_MCXT_SIMD_SIZE + 8) /* offset simd,cpsr,lr */ - str REG_R0, [sp, #(PRIV_MCXT_SP_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */ - sub sp, sp, #(PRIV_MCXT_SIZE-PRIV_MCXT_SIMD_SIZE-4) /* simd,cpsr */ - mov REG_R0, sp +#if PRIV_MCXT_SIZE % 8 != 0 +# error Size of priv_mcontext_t should be 8-byte aligned. +#endif + /* space for mcontext + padding + LR (stack must be 8-byte aligned */ + sub sp, sp, #(PRIV_MCXT_SIZE + 8) + str lr, [sp, #(PRIV_MCXT_SIZE + 4)] +#if PRIV_MCXT_R0_OFFSET != 0 +# error +#endif + stmia sp, {r0-r12} + add r0, sp, #(PRIV_MCXT_SIZE + 8) /* get SP at function entry */ + str r0, [sp, #PRIV_MCXT_SP_OFFSET] + str lr, [sp, #PRIV_MCXT_LR_OFFSET] + str lr, [sp, #PRIV_MCXT_PC_OFFSET] /* save LR as PC */ + mrs r0, cpsr + str r0, [sp, #PRIV_MCXT_CPSR_OFFSET] + add r0, sp, #PRIV_MCXT_SIMD_OFFSET + vstmia r0!, {d0-d15} + vstmia r0!, {d16-d31} + mov r0, sp CALLC1(GLOBAL_REF(dr_app_start_helper), REG_R0) /* if we get here, DR is not taking over */ + ldr lr, [sp, #(PRIV_MCXT_SIZE + 4)] add sp, sp, #PRIV_MCXT_SIZE - pop {pc} + bx lr END_FUNC(dr_app_start) /* @@ -230,25 +224,32 @@ GLOBAL_LABEL(dr_app_running_under_dynamorio:) */ DECLARE_EXPORTED_FUNC(dynamorio_app_take_over) GLOBAL_LABEL(dynamorio_app_take_over:) - push {lr} - vstmdb sp!, {d16-d31} - vstmdb sp!, {d0-d15} - mrs REG_R0, cpsr /* r0 is scratch */ - push {REG_R0} - /* We can't push all regs w/ writeback */ - stmdb sp, {REG_R0-r15} - str lr, [sp, #(PRIV_MCXT_PC_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */ - /* we need the sp at function entry */ - mov REG_R0, sp - add REG_R0, REG_R0, #(PRIV_MCXT_SIMD_SIZE + 8) /* offset simd,cpsr,lr */ - str REG_R0, [sp, #(PRIV_MCXT_SP_FROM_SIMD+4)] /* +4 b/c we pushed cpsr */ - sub sp, sp, #(PRIV_MCXT_SIZE-PRIV_MCXT_SIMD_SIZE-4) /* simd,cpsr */ - mov REG_R0, sp +#if PRIV_MCXT_SIZE % 8 != 0 +# error Size of priv_mcontext_t should be 8-byte aligned. +#endif + /* space for mcontext + padding + LR (stack must be 8-byte aligned */ + sub sp, sp, #(PRIV_MCXT_SIZE + 8) + str lr, [sp, #(PRIV_MCXT_SIZE + 4)] +#if PRIV_MCXT_R0_OFFSET != 0 +# error +#endif + stmia sp, {r0-r12} + add r0, sp, #(PRIV_MCXT_SIZE + 8) /* get SP at function entry */ + str r0, [sp, #PRIV_MCXT_SP_OFFSET] + str lr, [sp, #PRIV_MCXT_LR_OFFSET] + str lr, [sp, #PRIV_MCXT_PC_OFFSET] /* save LR as PC */ + mrs r0, cpsr + str r0, [sp, #PRIV_MCXT_CPSR_OFFSET] + add r0, sp, #PRIV_MCXT_SIMD_OFFSET + vstmia r0!, {d0-d15} + vstmia r0!, {d16-d31} + mov r0, sp CALLC1(GLOBAL_REF(dynamorio_app_take_over_helper), REG_R0) /* if we get here, DR is not taking over */ + ldr lr, [sp, #(PRIV_MCXT_SIZE + 4)] add sp, sp, #PRIV_MCXT_SIZE - pop {pc} - END_FUNC(dynamorio_app_take_over) + bx lr + END_FUNC(dr_app_start) /* diff --git a/core/lib/mcxtx_api.h b/core/lib/mcxtx_api.h index 8682cab1b1b..5e78c519386 100644 --- a/core/lib/mcxtx_api.h +++ b/core/lib/mcxtx_api.h @@ -128,6 +128,7 @@ uint apsr; /**< The application program status registers in AArch32. */ uint cpsr; /**< The current program status registers in AArch32. */ }; /**< The anonymous union of alternative names for apsr/cpsr register. */ + byte padding[4]; /**< The padding to get simd field 8-byte aligned. */ # endif /* 64/32-bit */ # ifdef X64 /* 64-bit */ diff --git a/suite/tests/client-interface/drx-scattergather-aarch64.cpp b/suite/tests/client-interface/drx-scattergather-aarch64.cpp index b8423495492..20f9e132f2d 100644 --- a/suite/tests/client-interface/drx-scattergather-aarch64.cpp +++ b/suite/tests/client-interface/drx-scattergather-aarch64.cpp @@ -831,8 +831,10 @@ template struct test_case_base_t { // Byte values || AA|| BB|| CC|| DD|| // Half values || AA|AA|| BB|BB|| CC|CC|| DD|DD|| // Word values ||AA|AA|AA|AA||BB|BB|BB|BB||CC|CC|CC|CC||DD|DD|DD|DD|| - assert(value_size != element_size_t::DOUBLE); switch (value_size) { + case element_size_t::DOUBLE: + assert(false); // Not reachable. + break; #define CASE(size, member, val0, val1, val2, val3) \ case element_size_t::size: \ member = { { { static_cast(offsets[0]), val0 }, \ diff --git a/suite/tests/tools.c b/suite/tests/tools.c index a57f386c671..174edfe63e8 100644 --- a/suite/tests/tools.c +++ b/suite/tests/tools.c @@ -378,16 +378,7 @@ nolibc_print(const char *str) # else SYS_write, # endif - 3, -# if defined(MACOS) || defined(ANDROID) - stderr->_file, - /* TODO i#1973: handle opaque FILE * on musl libc */ -# elif defined(MUSL) - STDERR_FILENO, -# else - stderr->_fileno, -# endif - str, nolibc_strlen(str)); + 3, STDERR_FILENO, str, nolibc_strlen(str)); } /* Safe print int syscall.