From 9d2210ce13ab4af0c31b0419a96044383cc48d29 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Tue, 21 Jan 2025 15:05:31 +0100 Subject: [PATCH 1/3] Remove jl_task_stack_buffer This was removed previously in PR #54527 but had to be reverted in PR #54559 as one usage remained (more by accident then by design). This has since then been resolved. --- src/jl_exported_funcs.inc | 1 - src/julia_gcext.h | 9 --------- src/task.c | 28 ---------------------------- 3 files changed, 38 deletions(-) diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index b92380df7a49c..c1b29a091511b 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -449,7 +449,6 @@ XX(jl_tagged_gensym) \ XX(jl_take_buffer) \ XX(jl_task_get_next) \ - XX(jl_task_stack_buffer) \ XX(jl_termios_size) \ XX(jl_test_cpu_feature) \ XX(jl_threadid) \ diff --git a/src/julia_gcext.h b/src/julia_gcext.h index 05140e4b09ace..e124f58a09402 100644 --- a/src/julia_gcext.h +++ b/src/julia_gcext.h @@ -135,15 +135,6 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void); // NOTE: Only valid to call from within a GC context. JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_NOTSAFEPOINT; -// Return a non-null pointer to the start of the stack area if the task -// has an associated stack buffer. In that case, *size will also contain -// the size of that stack buffer upon return. Also, if task is a thread's -// current task, that thread's id will be stored in *tid; otherwise, -// *tid will be set to -1. -// -// DEPRECATED: use jl_active_task_stack() instead. -JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid); - // Query the active and total stack range for the given task, and set // *active_start and *active_end respectively *total_start and *total_end // accordingly. The range for the active part is a best-effort approximation diff --git a/src/task.c b/src/task.c index d56d60eb58cb5..37e7f0e1f5440 100644 --- a/src/task.c +++ b/src/task.c @@ -352,34 +352,6 @@ void JL_NORETURN jl_finish_task(jl_task_t *ct) abort(); } -JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid) -{ - size_t off = 0; -#ifndef _OS_WINDOWS_ - jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; - if (ptls0->root_task == task) { - // See jl_init_root_task(). The root task of the main thread - // has its buffer enlarged by an artificial 3000000 bytes, but - // that means that the start of the buffer usually points to - // inaccessible memory. We need to correct for this. - off = ROOT_TASK_STACK_ADJUSTMENT; - } -#endif - jl_ptls_t ptls2 = task->ptls; - *ptid = -1; - if (ptls2) { - *ptid = jl_atomic_load_relaxed(&task->tid); -#ifdef COPY_STACKS - if (task->ctx.copy_stack) { - *size = ptls2->stacksize; - return (char *)ptls2->stackbase - *size; - } -#endif - } - *size = task->ctx.bufsz - off; - return (void *)((char *)task->ctx.stkbuf + off); -} - JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task, char **active_start, char **active_end, char **total_start, char **total_end) From be7b44d3627888e2526d435c45426d3286de4fa1 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Tue, 21 Jan 2025 15:21:34 +0100 Subject: [PATCH 2/3] Move jl_task_t to julia_threads.h --- src/julia.h | 85 +++++---------------------------------------- src/julia_threads.h | 69 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 77 deletions(-) diff --git a/src/julia.h b/src/julia.h index b5416568b7ae9..6c3ea11b6b29a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -72,21 +72,20 @@ typedef struct _jl_tls_states_t *jl_ptls_t; #endif #include "gc-interface.h" #include "julia_atomics.h" -#include "julia_threads.h" #include "julia_assert.h" +// the common fields are hidden before the pointer, but the following macro is +// used to indicate which types below are subtypes of jl_value_t +#define JL_DATA_TYPE +typedef struct _jl_value_t jl_value_t; +#include "julia_threads.h" + #ifdef __cplusplus extern "C" { #endif // core data types ------------------------------------------------------------ -// the common fields are hidden before the pointer, but the following macro is -// used to indicate which types below are subtypes of jl_value_t -#define JL_DATA_TYPE - -typedef struct _jl_value_t jl_value_t; - struct _jl_taggedvalue_bits { uintptr_t gc:2; uintptr_t in_image:1; @@ -484,9 +483,6 @@ typedef struct _jl_abi_override_t { jl_method_instance_t *def; } jl_abi_override_t; -// all values are callable as Functions -typedef jl_value_t jl_function_t; - typedef struct { JL_DATA_TYPE jl_sym_t *name; @@ -2263,12 +2259,8 @@ JL_DLLEXPORT void jl_sigatomic_end(void); // tasks and exceptions ------------------------------------------------------- -typedef struct _jl_timing_block_t jl_timing_block_t; -typedef struct _jl_timing_event_t jl_timing_event_t; -typedef struct _jl_excstack_t jl_excstack_t; - // info describing an exception handler -typedef struct _jl_handler_t { +struct _jl_handler_t { jl_jmp_buf eh_ctx; jl_gcframe_t *gcstack; jl_value_t *scope; @@ -2278,68 +2270,7 @@ typedef struct _jl_handler_t { sig_atomic_t defer_signal; jl_timing_block_t *timing_stack; size_t world_age; -} jl_handler_t; - -#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1 - -typedef struct _jl_task_t { - JL_DATA_TYPE - jl_value_t *next; // invasive linked list for scheduler - jl_value_t *queue; // invasive linked list for scheduler - jl_value_t *tls; - jl_value_t *donenotify; - jl_value_t *result; - jl_value_t *scope; - jl_function_t *start; - _Atomic(uint8_t) _state; - uint8_t sticky; // record whether this Task can be migrated to a new thread - uint16_t priority; - _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with - uint8_t pad0[3]; - // === 64 bytes (cache line) - uint64_t rngState[JL_RNG_SIZE]; - // flag indicating whether or not to record timing metrics for this task - uint8_t metrics_enabled; - uint8_t pad1[3]; - // timestamp this task first entered the run queue - _Atomic(uint64_t) first_enqueued_at; - // timestamp this task was most recently scheduled to run - _Atomic(uint64_t) last_started_running_at; - // time this task has spent running; updated when it yields or finishes. - _Atomic(uint64_t) running_time_ns; - // === 64 bytes (cache line) - // timestamp this task finished (i.e. entered state DONE or FAILED). - _Atomic(uint64_t) finished_at; - -// hidden state: - - // id of owning thread - does not need to be defined until the task runs - _Atomic(int16_t) tid; - // threadpool id - int8_t threadpoolid; - // Reentrancy bits - // Bit 0: 1 if we are currently running inference/codegen - // Bit 1-2: 0-3 counter of how many times we've reentered inference - // Bit 3: 1 if we are writing the image and inference is illegal - uint8_t reentrant_timing; - // 2 bytes of padding on 32-bit, 6 bytes on 64-bit - // uint16_t padding2_32; - // uint48_t padding2_64; - // saved gc stack top for context switches - jl_gcframe_t *gcstack; - size_t world_age; - // quick lookup for current ptls - jl_ptls_t ptls; // == jl_all_tls_states[tid] -#ifdef USE_TRACY - const char *name; -#endif - // saved exception stack - jl_excstack_t *excstack; - // current exception handler - jl_handler_t *eh; - // saved thread state - jl_ucontext_t ctx; // pointer into stkbuf, if suspended -} jl_task_t; +}; #define JL_TASK_STATE_RUNNABLE 0 #define JL_TASK_STATE_DONE 1 diff --git a/src/julia_threads.h b/src/julia_threads.h index b6ef65dc7fe52..0f0ee11d1918c 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -218,6 +218,75 @@ typedef struct _jl_tls_states_t { #endif } jl_tls_states_t; +#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1 + +// all values are callable as Functions +typedef jl_value_t jl_function_t; + +typedef struct _jl_timing_block_t jl_timing_block_t; +typedef struct _jl_timing_event_t jl_timing_event_t; +typedef struct _jl_excstack_t jl_excstack_t; +typedef struct _jl_handler_t jl_handler_t; + +typedef struct _jl_task_t { + JL_DATA_TYPE + jl_value_t *next; // invasive linked list for scheduler + jl_value_t *queue; // invasive linked list for scheduler + jl_value_t *tls; + jl_value_t *donenotify; + jl_value_t *result; + jl_value_t *scope; + jl_function_t *start; + _Atomic(uint8_t) _state; + uint8_t sticky; // record whether this Task can be migrated to a new thread + uint16_t priority; + _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with + uint8_t pad0[3]; + // === 64 bytes (cache line) + uint64_t rngState[JL_RNG_SIZE]; + // flag indicating whether or not to record timing metrics for this task + uint8_t metrics_enabled; + uint8_t pad1[3]; + // timestamp this task first entered the run queue + _Atomic(uint64_t) first_enqueued_at; + // timestamp this task was most recently scheduled to run + _Atomic(uint64_t) last_started_running_at; + // time this task has spent running; updated when it yields or finishes. + _Atomic(uint64_t) running_time_ns; + // === 64 bytes (cache line) + // timestamp this task finished (i.e. entered state DONE or FAILED). + _Atomic(uint64_t) finished_at; + +// hidden state: + + // id of owning thread - does not need to be defined until the task runs + _Atomic(int16_t) tid; + // threadpool id + int8_t threadpoolid; + // Reentrancy bits + // Bit 0: 1 if we are currently running inference/codegen + // Bit 1-2: 0-3 counter of how many times we've reentered inference + // Bit 3: 1 if we are writing the image and inference is illegal + uint8_t reentrant_timing; + // 2 bytes of padding on 32-bit, 6 bytes on 64-bit + // uint16_t padding2_32; + // uint48_t padding2_64; + // saved gc stack top for context switches + jl_gcframe_t *gcstack; + size_t world_age; + // quick lookup for current ptls + jl_ptls_t ptls; // == jl_all_tls_states[tid] +#ifdef USE_TRACY + const char *name; +#endif + // saved exception stack + jl_excstack_t *excstack; + // current exception handler + jl_handler_t *eh; + // saved thread state + jl_ucontext_t ctx; // pointer into stkbuf, if suspended +} jl_task_t; + JL_DLLEXPORT void *jl_get_ptls_states(void); // Update codegen version in `ccall.cpp` after changing either `pause` or `wake` From df689651186acecb2572ff6fcb8a38978f076c03 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 6 Nov 2024 17:33:47 +0000 Subject: [PATCH 3/3] fix jl_active_task_stack to return correct values Seems potentially slightly costly to need to generate these extra writes for every safepoint transition to keep track of the current stack location, but that is potentially an unavoidable cost to doing conservative stack scanning for gc. --- src/gc-stock.c | 1 + src/julia_gcext.h | 3 ++ src/julia_internal.h | 12 -------- src/julia_threads.h | 17 +++++++++++ src/llvm-codegen-shared.h | 29 +++++++++++------- src/llvm-ptls.cpp | 4 +-- src/safepoint.c | 2 ++ src/signals-mach.c | 11 +++++++ src/task.c | 63 ++++++++++++++++----------------------- test/gcext/gcext-test.jl | 5 ++-- test/gcext/gcext.c | 39 +++++++++++++++--------- 11 files changed, 107 insertions(+), 79 deletions(-) diff --git a/src/gc-stock.c b/src/gc-stock.c index 8118b3c5629ae..ea4654aa8a1b6 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3414,6 +3414,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) } jl_gc_debug_print(); + ct->ctx.activefp = (char*)jl_get_frame_addr(); int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING); // `jl_safepoint_start_gc()` makes sure only one thread can run the GC. diff --git a/src/julia_gcext.h b/src/julia_gcext.h index e124f58a09402..6403b586bcefe 100644 --- a/src/julia_gcext.h +++ b/src/julia_gcext.h @@ -139,6 +139,9 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_NOTSAFEPOINT; // *active_start and *active_end respectively *total_start and *total_end // accordingly. The range for the active part is a best-effort approximation // and may not be tight. +// +// NOTE: Only valid to call from within a GC context. May return incorrect +// values and segfault otherwise. JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task, char **active_start, char **active_end, char **total_start, char **total_end) JL_NOTSAFEPOINT; diff --git a/src/julia_internal.h b/src/julia_internal.h index 3e4967c9d4dca..77c05274de68c 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1765,18 +1765,6 @@ JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *fi void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; -STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT -{ -#ifdef __GNUC__ - return __builtin_frame_address(0); -#else - void *dummy = NULL; - // The mask is to suppress the compiler warning about returning - // address of local variable - return (void*)((uintptr_t)&dummy & ~(uintptr_t)15); -#endif -} - // Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the // julia side. If any of module, group, id, file or line are NULL, these will // be passed to the julia side as `nothing`. If `kwargs` is NULL an empty set diff --git a/src/julia_threads.h b/src/julia_threads.h index 0f0ee11d1918c..18b05b2b72d35 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -96,6 +96,7 @@ typedef struct { _jl_ucontext_t *ctx; jl_stack_context_t *copy_ctx; }; + void *activefp; void *stkbuf; // malloc'd memory (either copybuf or stack) size_t bufsz; // actual sizeof stkbuf unsigned int copy_stack:31; // sizeof stack for copybuf @@ -226,6 +227,7 @@ typedef jl_value_t jl_function_t; typedef struct _jl_timing_block_t jl_timing_block_t; typedef struct _jl_timing_event_t jl_timing_event_t; typedef struct _jl_excstack_t jl_excstack_t; + typedef struct _jl_handler_t jl_handler_t; typedef struct _jl_task_t { @@ -316,6 +318,18 @@ JL_DLLEXPORT void (jl_cpu_pause)(void); JL_DLLEXPORT void (jl_cpu_suspend)(void); JL_DLLEXPORT void (jl_cpu_wake)(void); +STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT +{ +#ifdef __GNUC__ + return __builtin_frame_address(0); +#else + void *dummy = NULL; + // The mask is to suppress the compiler warning about returning + // address of local variable + return (void*)((uintptr_t)&dummy & ~(uintptr_t)15); +#endif +} + #ifdef __clang_gcanalyzer__ // Note that the sigint safepoint can also trigger GC, albeit less likely void jl_gc_safepoint_(jl_ptls_t tls); @@ -343,6 +357,9 @@ STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state, { assert(old_state != JL_GC_PARALLEL_COLLECTOR_THREAD); assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD); + if (__builtin_constant_p(state) ? state : // required if !old_state && state, otherwise optional + __builtin_constant_p(old_state) ? !old_state : 1) + jl_atomic_load_relaxed(&ptls->current_task)->ctx.activefp = (char*)jl_get_frame_addr(); jl_atomic_store_release(&ptls->gc_state, state); if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE) jl_gc_safepoint_(ptls); diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h index ff6f5a97299d7..ae83b6ffcb9f4 100644 --- a/src/llvm-codegen-shared.h +++ b/src/llvm-codegen-shared.h @@ -237,9 +237,10 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s emit_signal_fence(builder); } -static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final) +static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, llvm::Value *old_state, bool final) { using namespace llvm; + Value *ptls = get_current_ptls_from_task(builder, task, tbaa); Type *T_int8 = state->getType(); unsigned offset = offsetof(jl_tls_states_t, gc_state); Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state"); @@ -247,6 +248,13 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T old_state = builder.CreateLoad(T_int8, gc_state); cast(old_state)->setOrdering(AtomicOrdering::Monotonic); } + if (isa(state) ? state == builder.getInt8(0) : + isa(old_state) ? old_state == builder.getInt8(0) : true) { + unsigned offset = offsetof(jl_task_t, ctx.activefp); + Value *currentfp = builder.CreateConstInBoundsGEP1_32(T_int8, task, offset, "gc_state"); + Value *fp = builder.CreateIntrinsic(Intrinsic::frameaddress, {builder.getPtrTy()}, {builder.getInt32(0)}); + builder.CreateAlignedStore(fp, currentfp, Align(sizeof(void*))); + } builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release); if (auto *C = dyn_cast(old_state)) if (C->isZero()) @@ -261,39 +269,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T builder.CreateICmpEQ(state, zero8)), passBB, exitBB); builder.SetInsertPoint(passBB); - MDNode *tbaa = get_tbaa_const(builder.getContext()); - emit_gc_safepoint(builder, T_size, ptls, tbaa, final); + emit_gc_safepoint(builder, T_size, ptls, get_tbaa_const(builder.getContext()), final); builder.CreateBr(exitBB); builder.SetInsertPoint(exitBB); return old_state; } -static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final) +static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final) { using namespace llvm; Value *state = builder.getInt8(0); - return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final); + return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final); } -static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final) +static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final) { using namespace llvm; Value *old_state = builder.getInt8(JL_GC_STATE_UNSAFE); - return emit_gc_state_set(builder, T_size, ptls, state, old_state, final); + return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final); } -static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final) +static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final) { using namespace llvm; Value *state = builder.getInt8(JL_GC_STATE_SAFE); - return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final); + return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final); } -static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final) +static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final) { using namespace llvm; Value *old_state = builder.getInt8(JL_GC_STATE_SAFE); - return emit_gc_state_set(builder, T_size, ptls, state, old_state, final); + return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final); } // Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14. diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index e36136859517a..5b3a14d6b8e9b 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -185,7 +185,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, builder.SetInsertPoint(fastTerm->getParent()); fastTerm->removeFromParent(); MDNode *tbaa = tbaa_gcframe; - Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true); + Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_task_from_pgcstack(builder, pgcstack), tbaa, true); builder.Insert(fastTerm); phi->addIncoming(pgcstack, fastTerm->getParent()); // emit pre-return cleanup @@ -197,7 +197,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, for (auto &BB : *pgcstack->getParent()->getParent()) { if (isa(BB.getTerminator())) { builder.SetInsertPoint(BB.getTerminator()); - emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true); + emit_gc_unsafe_leave(builder, T_size, get_current_task_from_pgcstack(builder, phi), tbaa, last_gc_state, true); } } } diff --git a/src/safepoint.c b/src/safepoint.c index 66bea539861f8..1f5edfa569b67 100644 --- a/src/safepoint.c +++ b/src/safepoint.c @@ -240,6 +240,7 @@ void jl_set_gc_and_wait(jl_task_t *ct) // n.b. not used on _OS_DARWIN_ // reading own gc state doesn't need atomic ops since no one else // should store to it. int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state); + ct->ctx.activefp = (char*)jl_get_frame_addr(); jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING); uv_mutex_lock(&safepoint_lock); uv_cond_broadcast(&safepoint_cond_begin); @@ -279,6 +280,7 @@ void jl_safepoint_wait_thread_resume(jl_task_t *ct) // might have already observed our gc_state. // if (!jl_atomic_load_relaxed(&ct->ptls->suspend_count)) return; int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state); + ct->ctx.activefp = (char*)jl_get_frame_addr(); jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING); uv_mutex_lock(&ct->ptls->sleep_lock); if (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) { diff --git a/src/signals-mach.c b/src/signals-mach.c index 1c4af2cf9d033..17c1af632a5fb 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -108,6 +108,17 @@ void jl_mach_gc_end(void) // implement jl_set_gc_and_wait from a different thread static void jl_mach_gc_wait(jl_ptls_t ptls2, mach_port_t thread, int16_t tid) { + unsigned int count = MACH_THREAD_STATE_COUNT; + host_thread_state_t state; + kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count); + ptls2->current_task->ctx.activefp = +#ifdef _CPU_X86_64_ + state->__rsp; +#elif defined(_CPU_AARCH64_) + state->__sp; +#else +#error fp not defined for platform +#endif // relaxed, since we don't mind missing one--we will hit another soon (immediately probably) uv_mutex_lock(&safepoint_lock); // Since this gets set to zero only while the safepoint_lock was held this diff --git a/src/task.c b/src/task.c index 37e7f0e1f5440..b1e4e8709d87f 100644 --- a/src/task.c +++ b/src/task.c @@ -356,49 +356,35 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task, char **active_start, char **active_end, char **total_start, char **total_end) { - if (!task->ctx.started) { - *total_start = *active_start = 0; - *total_end = *active_end = 0; + *total_start = *active_start = 0; + *total_end = *active_end = 0; + if (!task->ctx.started) return; - } - jl_ptls_t ptls2 = task->ptls; - if (task->ctx.copy_stack && ptls2) { - *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize; - *total_end = *active_end = (char*)ptls2->stackbase; - } - else if (task->ctx.stkbuf) { - *total_start = *active_start = (char*)task->ctx.stkbuf; -#ifndef _OS_WINDOWS_ - jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; - if (ptls0->root_task == task) { - // See jl_init_root_task(). The root task of the main thread - // has its buffer enlarged by an artificial 3000000 bytes, but - // that means that the start of the buffer usually points to - // inaccessible memory. We need to correct for this. - *active_start += ROOT_TASK_STACK_ADJUSTMENT; - *total_start += ROOT_TASK_STACK_ADJUSTMENT; - } -#endif - - *total_end = *active_end = (char*)task->ctx.stkbuf + task->ctx.bufsz; -#ifdef COPY_STACKS - // save_stack stores the stack of an inactive task in stkbuf, and the - // actual number of used bytes in copy_stack. - if (task->ctx.copy_stack > 1) + if (task->ctx.copy_stack) { + if (task->ctx.ctx) { + // currently paused task + // save_stack stores the stack of an inactive task in stkbuf, and the + // actual number of used bytes in copy_stack. + *active_start = *total_start = (char*)task->ctx.stkbuf; *active_end = (char*)task->ctx.stkbuf + task->ctx.copy_stack; -#endif + *total_end = (char*)task->ctx.stkbuf + task->ctx.bufsz; + } + else if (task->ptls) { + jl_ptls_t ptls2 = task->ptls; + // currently running copy task + *active_start = (char*)task->ctx.activefp; + *total_start = (char*)ptls2->stackbase - ptls2->stacksize; + *total_end = *active_end = (char*)ptls2->stackbase; + } } else { - // no stack allocated yet - *total_start = *active_start = 0; - *total_end = *active_end = 0; - return; - } - - if (task == jl_current_task) { - // scan up to current `sp` for current thread and task - *active_start = (char*)jl_get_frame_addr(); + if (task->ctx.stkbuf) { + // currently running task + *active_start = (char*)task->ctx.activefp; + *total_start = (char*)task->ctx.stkbuf; + *active_end = *total_end = (char*)task->ctx.stkbuf + task->ctx.bufsz; + } } } @@ -506,6 +492,7 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt) } // set up global state for new task and clear global state for old task + lastt->ctx.activefp = (char*)jl_get_frame_addr(); t->ptls = ptls; jl_atomic_store_relaxed(&ptls->current_task, t); JL_GC_PROMISE_ROOTED(t); diff --git a/test/gcext/gcext-test.jl b/test/gcext/gcext-test.jl index 81637392e3c5d..46b4e86ab5d9d 100644 --- a/test/gcext/gcext-test.jl +++ b/test/gcext/gcext-test.jl @@ -27,11 +27,12 @@ end close(out.in) close(err.in) out_task = @async readlines(out) - err_task = @async readlines(err) + err_task = @async read(err, String) # @test success(p) errlines = fetch(err_task) lines = fetch(out_task) - @test length(errlines) == 0 + print(errlines) + @test isempty(errlines) # @test length(lines) == 6 @test length(lines) == 5 @test checknum(lines[2], r"([0-9]+) full collections", n -> n >= 10) diff --git a/test/gcext/gcext.c b/test/gcext/gcext.c index d5bf91ec8c9ab..f06132bf26194 100644 --- a/test/gcext/gcext.c +++ b/test/gcext/gcext.c @@ -479,8 +479,6 @@ static int stack_grows_down(void) { void task_scanner(jl_task_t *task, int root_task) { - int var_on_frame; - // The task scanner is not necessary for liveness, as the // corresponding task stack is already part of the stack. // Its purpose is simply to test that the task scanner @@ -491,23 +489,36 @@ void task_scanner(jl_task_t *task, int root_task) char *total_end_stack; jl_active_task_stack(task, &start_stack, &end_stack, &total_start_stack, &total_end_stack); + if (end_stack == 0) + return; + + if (!(start_stack < end_stack) || + !(total_start_stack < total_end_stack) || + !(total_start_stack <= start_stack) || + !(end_stack <= total_end_stack)) { + fputs("incorrect stack bounds\n", stderr); + fflush(stderr); + abort(); + } + // this is the live stack of a thread. Is it ours? - if (start_stack && task == (jl_task_t*)jl_get_current_task()) { - if (!(lt_ptr(start_stack, &var_on_frame) && lt_ptr(&var_on_frame, end_stack))) { + if (task == (jl_task_t*)jl_get_current_task()) { + void *var_on_frame = task->ctx.activefp + 1; + if (!(lt_ptr(start_stack, var_on_frame) && lt_ptr(var_on_frame, end_stack))) { // error, current stack frame must be on the live stack. - jl_error("stack frame not part of the current task"); + fputs("stack frame not part of the current task\n", stderr); + fflush(stderr); + abort(); } } - if (start_stack) { - void **start = (void **)start_stack; - void **end = (void **)end_stack; - while (start < end) { - void *p = *start++; - void *q = jl_gc_internal_obj_base_ptr(p); - if (q) { - jl_gc_mark_queue_obj(ptls, q); - } + void **start = (void **)start_stack; + void **end = (void **)end_stack; + while (start < end) { + void *p = *start++; + void *q = jl_gc_internal_obj_base_ptr(p); + if (q) { + jl_gc_mark_queue_obj(ptls, q); } } }