From 2252520b3332021b2aed7b4d4d96946f60ba91c2 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Wed, 15 Jan 2025 10:18:04 -0800 Subject: [PATCH] =?UTF-8?q?Revert=20"linux:=20workaround=20to=20avoid=20de?= =?UTF-8?q?adlock=20inside=20dl=5Fiterate=5Fphdr=20in=20glibc=20(=E2=80=A6?= =?UTF-8?q?"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 1f05953fab1e14e1e881a224d8719d2c7d637da3. --- src/julia_internal.h | 3 +- src/signals-mach.c | 9 ++- src/signals-unix.c | 186 ++++++++++++++++++++----------------------- src/signals-win.c | 19 ++--- src/stackwalk.c | 18 +---- 5 files changed, 105 insertions(+), 130 deletions(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index a464025edf856..81e9ebb537667 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -208,7 +208,8 @@ JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER; JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER; JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; -void jl_with_stackwalk_lock(void (*f)(void*) JL_NOTSAFEPOINT, void *ctx) JL_NOTSAFEPOINT; +int jl_lock_stackwalk(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER; +void jl_unlock_stackwalk(int lockret) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; arraylist_t *jl_get_all_tasks_arraylist(void) JL_NOTSAFEPOINT; typedef struct { diff --git a/src/signals-mach.c b/src/signals-mach.c index 1c4af2cf9d033..24508a8902d5e 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -714,10 +714,13 @@ static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked) jl_unlock_profile(); } -void jl_with_stackwalk_lock(void (*f)(void*), void *ctx) +int jl_lock_stackwalk(void) +{ + return jl_lock_profile_mach(1); +} + +void jl_unlock_stackwalk(int lockret) { - int lockret = jl_lock_profile_mach(1); - f(ctx); jl_unlock_profile_mach(1, lockret); } diff --git a/src/signals-unix.c b/src/signals-unix.c index 91d3378068f84..394c4a108b647 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -308,27 +308,20 @@ int exc_reg_is_write_fault(uintptr_t esr) { #else #include #include -#include -typedef struct { - void (*f)(void*) JL_NOTSAFEPOINT; - void *ctx; -} callback_t; -static int with_dl_iterate_phdr_lock(struct dl_phdr_info *info, size_t size, void *data) +int jl_lock_stackwalk(void) { jl_lock_profile(); - callback_t *callback = (callback_t*)data; - callback->f(callback->ctx); - jl_unlock_profile(); - return 1; // only call this once + return 0; } -void jl_with_stackwalk_lock(void (*f)(void*), void *ctx) +void jl_unlock_stackwalk(int lockret) { - callback_t callback = {f, ctx}; - dl_iterate_phdr(with_dl_iterate_phdr_lock, &callback); + (void)lockret; + jl_unlock_profile(); } + #if defined(_OS_LINUX_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_)) int is_write_fault(void *context) { ucontext_t *ctx = (ucontext_t*)context; @@ -442,7 +435,7 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context) } pthread_mutex_t in_signal_lock; // shared with jl_delete_thread -static bt_context_t *usr2_signal_context; // protected by in_signal_lock +static bt_context_t *signal_context; // protected by in_signal_lock static int exit_signal_cond = -1; static int signal_caught_cond = -1; static int signals_inflight = 0; @@ -514,7 +507,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) request = jl_atomic_load_acquire(&ptls2->signal_request); assert(request == 0 || request == -1); (void) request; jl_atomic_store_release(&ptls2->signal_request, 4); // prepare to resume normally, but later code may change this - *ctx = *usr2_signal_context; + *ctx = *signal_context; return 1; } @@ -594,8 +587,8 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx) if (!jl_atomic_cmpswap(&ptls->signal_request, &request, -1)) return; if (request == 1) { - usr2_signal_context = jl_to_bt_context(ctx); - // acknowledge that we saw the signal_request and set usr2_signal_context + signal_context = jl_to_bt_context(ctx); + // acknowledge that we saw the signal_request and set signal_context int err; eventfd_t got = 1; err = write(signal_caught_cond, &got, sizeof(eventfd_t)); @@ -609,7 +602,7 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx) if (err != sizeof(eventfd_t)) abort(); assert(got == 1); request = jl_atomic_exchange(&ptls->signal_request, -1); - usr2_signal_context = NULL; + signal_context = NULL; assert(request == 2 || request == 3 || request == 4); } int err; @@ -813,7 +806,7 @@ void trigger_profile_peek(void) jl_safe_printf("\n======================================================================================\n"); jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration); jl_safe_printf("======================================================================================\n"); - if (profile_bt_size_max == 0) { + if (profile_bt_size_max == 0){ // If the buffer hasn't been initialized, initialize with default size // Keep these values synchronized with Profile.default_init() if (jl_profile_init(10000000, 1000000) == -1) { @@ -828,93 +821,59 @@ void trigger_profile_peek(void) profile_autostop_time = jl_hrtime() + (profile_peek_duration * 1e9); } -#if !defined(JL_DISABLE_LIBUNWIND) - -static jl_bt_element_t signal_bt_data[JL_MAX_BT_SIZE + 1]; -static size_t signal_bt_size = 0; -static void do_critical_profile(void *ctx) +// assumes holding `jl_lock_stackwalk` +void jl_profile_thread_unix(int tid, bt_context_t *signal_context) { - bt_context_t signal_context; - // sample each thread, round-robin style in reverse order - // (so that thread zero gets notified last) - int nthreads = jl_atomic_load_acquire(&jl_n_threads); - for (int i = nthreads; i-- > 0; ) { - // notify thread to stop - if (!jl_thread_suspend_and_get_state(i, 1, &signal_context)) - continue; - - // do backtrace on thread contexts for critical signals - // this part must be signal-handler safe - signal_bt_size += rec_backtrace_ctx(signal_bt_data + signal_bt_size, - JL_MAX_BT_SIZE / nthreads - 1, - &signal_context, NULL); - signal_bt_data[signal_bt_size++].uintptr = 0; - jl_thread_resume(i); + if (jl_profile_is_buffer_full()) { + // Buffer full: Delete the timer + jl_profile_stop_timer(); + return; } -} - -static void do_profile(void *ctx) -{ - bt_context_t signal_context; - int nthreads = jl_atomic_load_acquire(&jl_n_threads); - int *randperm = profile_get_randperm(nthreads); - for (int idx = nthreads; idx-- > 0; ) { - // Stop the threads in the random order. - int tid = randperm[idx]; - // do backtrace for profiler - if (!profile_running) - return; - if (jl_profile_is_buffer_full()) { - // Buffer full: Delete the timer - jl_profile_stop_timer(); - return; - } - // notify thread to stop - if (!jl_thread_suspend_and_get_state(tid, 1, &signal_context)) - return; - // unwinding can fail, so keep track of the current state - // and restore from the SEGV handler if anything happens. - jl_jmp_buf *old_buf = jl_get_safe_restore(); - jl_jmp_buf buf; - - jl_set_safe_restore(&buf); - if (jl_setjmp(buf, 0)) { - jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n"); - } - else { - // Get backtrace data - profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur, - profile_bt_size_max - profile_bt_size_cur - 1, &signal_context, NULL); - } - jl_set_safe_restore(old_buf); + // notify thread to stop + if (!jl_thread_suspend_and_get_state(tid, 1, signal_context)) + return; + // unwinding can fail, so keep track of the current state + // and restore from the SEGV handler if anything happens. + jl_jmp_buf *old_buf = jl_get_safe_restore(); + jl_jmp_buf buf; + + jl_set_safe_restore(&buf); + if (jl_setjmp(buf, 0)) { + jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n"); + } else { + // Get backtrace data + profile_bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)profile_bt_data_prof + profile_bt_size_cur, + profile_bt_size_max - profile_bt_size_cur - 1, signal_context, NULL); + } + jl_set_safe_restore(old_buf); - jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid]; + jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid]; - // store threadid but add 1 as 0 is preserved to indicate end of block - profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls2->tid + 1; + // store threadid but add 1 as 0 is preserved to indicate end of block + profile_bt_data_prof[profile_bt_size_cur++].uintptr = ptls2->tid + 1; - // store task id (never null) - profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task); + // store task id (never null) + profile_bt_data_prof[profile_bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task); - // store cpu cycle clock - profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock(); + // store cpu cycle clock + profile_bt_data_prof[profile_bt_size_cur++].uintptr = cycleclock(); - // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block) - int state = jl_atomic_load_relaxed(&ptls2->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING; - profile_bt_data_prof[profile_bt_size_cur++].uintptr = state; + // store whether thread is sleeping (don't ever encode a state as `0` since is preserved to indicate end of block) + int state = jl_atomic_load_relaxed(&ptls2->sleep_check_state) == 0 ? PROFILE_STATE_THREAD_NOT_SLEEPING : PROFILE_STATE_THREAD_SLEEPING; + profile_bt_data_prof[profile_bt_size_cur++].uintptr = state; - // Mark the end of this block with two 0's - profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0; - profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0; + // Mark the end of this block with two 0's + profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0; + profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0; - // notify thread to resume - jl_thread_resume(tid); - } + // notify thread to resume + jl_thread_resume(tid); } -#endif static void *signal_listener(void *arg) { + static jl_bt_element_t bt_data[JL_MAX_BT_SIZE + 1]; + static size_t bt_size = 0; sigset_t sset; int sig, critical, profile; jl_sigsetset(&sset); @@ -1046,10 +1005,28 @@ static void *signal_listener(void *arg) } } - signal_bt_size = 0; + int nthreads = jl_atomic_load_acquire(&jl_n_threads); + bt_size = 0; #if !defined(JL_DISABLE_LIBUNWIND) + bt_context_t signal_context; if (critical) { - jl_with_stackwalk_lock(do_critical_profile, NULL); + int lockret = jl_lock_stackwalk(); + // sample each thread, round-robin style in reverse order + // (so that thread zero gets notified last) + for (int i = nthreads; i-- > 0; ) { + // notify thread to stop + if (!jl_thread_suspend_and_get_state(i, 1, &signal_context)) + continue; + + // do backtrace on thread contexts for critical signals + // this part must be signal-handler safe + bt_size += rec_backtrace_ctx(bt_data + bt_size, + JL_MAX_BT_SIZE / nthreads - 1, + &signal_context, NULL); + bt_data[bt_size++].uintptr = 0; + jl_thread_resume(i); + } + jl_unlock_stackwalk(lockret); } else if (profile) { if (profile_all_tasks) { @@ -1057,7 +1034,17 @@ static void *signal_listener(void *arg) jl_profile_task(); } else { - jl_with_stackwalk_lock(do_profile, NULL); + int lockret = jl_lock_stackwalk(); + int *randperm = profile_get_randperm(nthreads); + for (int idx = nthreads; idx-- > 0; ) { + // Stop the threads in the random order. + int i = randperm[idx]; + // do backtrace for profiler + if (profile_running) { + jl_profile_thread_unix(i, &signal_context); + } + } + jl_unlock_stackwalk(lockret); } } #ifndef HAVE_MACH @@ -1078,12 +1065,11 @@ static void *signal_listener(void *arg) //#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L && !HAVE_KEVENT // si_code = info.si_code; //#endif - jl_exit_thread0(sig, signal_bt_data, signal_bt_size); + jl_exit_thread0(sig, bt_data, bt_size); } else if (critical) { // critical in this case actually means SIGINFO request #ifndef SIGINFO // SIGINFO already prints something similar automatically - int nthreads = jl_atomic_load_acquire(&jl_n_threads); int n_threads_running = 0; for (int idx = nthreads; idx-- > 0; ) { jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[idx]; @@ -1094,8 +1080,8 @@ static void *signal_listener(void *arg) jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig)); size_t i; - for (i = 0; i < signal_bt_size; i += jl_bt_entry_size(signal_bt_data + i)) { - jl_print_bt_entry_codeloc(signal_bt_data + i); + for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) { + jl_print_bt_entry_codeloc(bt_data + i); } } } diff --git a/src/signals-win.c b/src/signals-win.c index c8ae74f52dba4..dbf95fdb19791 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -383,25 +383,20 @@ void jl_thread_resume(int tid) } } -void jl_lock_stackwalk(void) +int jl_lock_stackwalk(void) { uv_mutex_lock(&jl_in_stackwalk); jl_lock_profile(); + return 0; } -void jl_unlock_stackwalk(void) +void jl_unlock_stackwalk(int lockret) { + (void)lockret; jl_unlock_profile(); uv_mutex_unlock(&jl_in_stackwalk); } -void jl_with_stackwalk_lock(void (*f)(void*), void *ctx) -{ - jl_lock_stackwalk(); - f(ctx); - jl_unlock_stackwalk(); -} - static DWORD WINAPI profile_bt( LPVOID lparam ) { @@ -421,10 +416,10 @@ static DWORD WINAPI profile_bt( LPVOID lparam ) } else { // TODO: bring this up to parity with other OS by adding loop over tid here - jl_lock_stackwalk(); + int lockret = jl_lock_stackwalk(); CONTEXT ctxThread; if (!jl_thread_suspend_and_get_state(0, 0, &ctxThread)) { - jl_unlock_stackwalk(); + jl_unlock_stackwalk(lockret); fputs("failed to suspend main thread. aborting profiling.", stderr); jl_profile_stop_timer(); break; @@ -451,7 +446,7 @@ static DWORD WINAPI profile_bt( LPVOID lparam ) // Mark the end of this block with two 0's profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0; profile_bt_data_prof[profile_bt_size_cur++].uintptr = 0; - jl_unlock_stackwalk(); + jl_unlock_stackwalk(lockret); jl_thread_resume(0); jl_check_profile_autostop(); } diff --git a/src/stackwalk.c b/src/stackwalk.c index f1d807908cf42..28adef030395e 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1249,17 +1249,6 @@ return 0; #endif } -typedef struct { - int16_t old; - bt_context_t *c; - int success; -} suspend_t; -static void suspend(void *ctx) -{ - suspend_t *suspenddata = (suspend_t*)ctx; - suspenddata->success = jl_thread_suspend_and_get_state(suspenddata->old, 1, suspenddata->c); -} - JL_DLLEXPORT jl_record_backtrace_result_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, size_t max_bt_size, int all_tasks_profiler) JL_NOTSAFEPOINT { int16_t tid = INT16_MAX; @@ -1281,14 +1270,15 @@ JL_DLLEXPORT jl_record_backtrace_result_t jl_record_backtrace(jl_task_t *t, jl_b bt_context_t c; int16_t old; for (old = -1; !jl_atomic_cmpswap(&t->tid, &old, tid) && old != tid; old = -1) { + int lockret = jl_lock_stackwalk(); // if this task is already running somewhere, we need to stop the thread it is running on and query its state - suspend_t suspenddata = {old, &c}; - jl_with_stackwalk_lock(suspend, &suspenddata); - if (!suspenddata.success) { + if (!jl_thread_suspend_and_get_state(old, 1, &c)) { + jl_unlock_stackwalk(lockret); if (jl_atomic_load_relaxed(&t->tid) != old) continue; return result; } + jl_unlock_stackwalk(lockret); if (jl_atomic_load_relaxed(&t->tid) == old) { jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[old]; if (ptls2->previous_task == t || // we might print the wrong stack here, since we can't know whether we executed the swapcontext yet or not, but it at least avoids trying to access the state inside uc_mcontext which might not be set yet