-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix jl_active_task_stack to return correct values #56477
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1765,18 +1765,6 @@ JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *fi | |
void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; | ||
void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; | ||
|
||
STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved to |
||
{ | ||
#ifdef __GNUC__ | ||
return __builtin_frame_address(0); | ||
#else | ||
void *dummy = NULL; | ||
// The mask is to suppress the compiler warning about returning | ||
// address of local variable | ||
return (void*)((uintptr_t)&dummy & ~(uintptr_t)15); | ||
#endif | ||
} | ||
|
||
// Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the | ||
// julia side. If any of module, group, id, file or line are NULL, these will | ||
// be passed to the julia side as `nothing`. If `kwargs` is NULL an empty set | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,6 +96,7 @@ typedef struct { | |
_jl_ucontext_t *ctx; | ||
jl_stack_context_t *copy_ctx; | ||
}; | ||
void *activefp; | ||
void *stkbuf; // malloc'd memory (either copybuf or stack) | ||
size_t bufsz; // actual sizeof stkbuf | ||
unsigned int copy_stack:31; // sizeof stack for copybuf | ||
|
@@ -218,6 +219,76 @@ typedef struct _jl_tls_states_t { | |
#endif | ||
} jl_tls_states_t; | ||
|
||
#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1 | ||
|
||
// all values are callable as Functions | ||
typedef jl_value_t jl_function_t; | ||
|
||
typedef struct _jl_timing_block_t jl_timing_block_t; | ||
typedef struct _jl_timing_event_t jl_timing_event_t; | ||
typedef struct _jl_excstack_t jl_excstack_t; | ||
|
||
typedef struct _jl_handler_t jl_handler_t; | ||
|
||
typedef struct _jl_task_t { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A major chunk of this PR seems to be about moving There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made PR #57117 to take care of this |
||
JL_DATA_TYPE | ||
jl_value_t *next; // invasive linked list for scheduler | ||
jl_value_t *queue; // invasive linked list for scheduler | ||
jl_value_t *tls; | ||
jl_value_t *donenotify; | ||
jl_value_t *result; | ||
jl_value_t *scope; | ||
jl_function_t *start; | ||
_Atomic(uint8_t) _state; | ||
uint8_t sticky; // record whether this Task can be migrated to a new thread | ||
uint16_t priority; | ||
_Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with | ||
uint8_t pad0[3]; | ||
// === 64 bytes (cache line) | ||
uint64_t rngState[JL_RNG_SIZE]; | ||
// flag indicating whether or not to record timing metrics for this task | ||
uint8_t metrics_enabled; | ||
uint8_t pad1[3]; | ||
// timestamp this task first entered the run queue | ||
_Atomic(uint64_t) first_enqueued_at; | ||
// timestamp this task was most recently scheduled to run | ||
_Atomic(uint64_t) last_started_running_at; | ||
// time this task has spent running; updated when it yields or finishes. | ||
_Atomic(uint64_t) running_time_ns; | ||
// === 64 bytes (cache line) | ||
// timestamp this task finished (i.e. entered state DONE or FAILED). | ||
_Atomic(uint64_t) finished_at; | ||
|
||
// hidden state: | ||
|
||
// id of owning thread - does not need to be defined until the task runs | ||
_Atomic(int16_t) tid; | ||
// threadpool id | ||
int8_t threadpoolid; | ||
// Reentrancy bits | ||
// Bit 0: 1 if we are currently running inference/codegen | ||
// Bit 1-2: 0-3 counter of how many times we've reentered inference | ||
// Bit 3: 1 if we are writing the image and inference is illegal | ||
uint8_t reentrant_timing; | ||
// 2 bytes of padding on 32-bit, 6 bytes on 64-bit | ||
// uint16_t padding2_32; | ||
// uint48_t padding2_64; | ||
// saved gc stack top for context switches | ||
jl_gcframe_t *gcstack; | ||
size_t world_age; | ||
// quick lookup for current ptls | ||
jl_ptls_t ptls; // == jl_all_tls_states[tid] | ||
#ifdef USE_TRACY | ||
const char *name; | ||
#endif | ||
// saved exception stack | ||
jl_excstack_t *excstack; | ||
// current exception handler | ||
jl_handler_t *eh; | ||
// saved thread state | ||
jl_ucontext_t ctx; // pointer into stkbuf, if suspended | ||
} jl_task_t; | ||
|
||
JL_DLLEXPORT void *jl_get_ptls_states(void); | ||
|
||
// Update codegen version in `ccall.cpp` after changing either `pause` or `wake` | ||
|
@@ -247,6 +318,18 @@ JL_DLLEXPORT void (jl_cpu_pause)(void); | |
JL_DLLEXPORT void (jl_cpu_suspend)(void); | ||
JL_DLLEXPORT void (jl_cpu_wake)(void); | ||
|
||
STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT | ||
{ | ||
#ifdef __GNUC__ | ||
return __builtin_frame_address(0); | ||
#else | ||
void *dummy = NULL; | ||
// The mask is to suppress the compiler warning about returning | ||
// address of local variable | ||
return (void*)((uintptr_t)&dummy & ~(uintptr_t)15); | ||
#endif | ||
} | ||
|
||
#ifdef __clang_gcanalyzer__ | ||
// Note that the sigint safepoint can also trigger GC, albeit less likely | ||
void jl_gc_safepoint_(jl_ptls_t tls); | ||
|
@@ -274,6 +357,9 @@ STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state, | |
{ | ||
assert(old_state != JL_GC_PARALLEL_COLLECTOR_THREAD); | ||
assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD); | ||
if (__builtin_constant_p(state) ? state : // required if !old_state && state, otherwise optional | ||
__builtin_constant_p(old_state) ? !old_state : 1) | ||
jl_atomic_load_relaxed(&ptls->current_task)->ctx.activefp = (char*)jl_get_frame_addr(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no idea what the semantics for This seems to store the current frame pointer into the task. Presumably that means this function is being called from the task as it is about to switch out? At least that would make sense. My personal dream is that the wonderful people adding MMTk and doing other GC restructuring work, like @gbaraldi @d-netto @udesou), add some more comments describing such things as they figure this stuff out for themselves ;-). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if we should be updating the Makes more sense to me to update this in the Also, I believe we should fetch the frame pointer from the context the thread was running with before it got trapped in the signal handler -- note that we run the signal handlers with a different stack. This is more or less what I started doing in #55003. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It ends up being mandatory, since we don't stop all threads when reading their pgcstack, although that does mean it may be often unreliable / unsafe to conservatively scan the stack of the other threads. The signal handler issue is a good point. We would need to pass through the ucontext_t to Even with those changed though, we know now that in v1.11+ the llvm optimizations have reached a point where we know many cases that will necessarily cause the GC to segfault when doing conservative stack scanning, so there isn't any safe way to do this. I am going to go ahead and close out this PR, though someone could revive it under a build flag if they really care about it, and don't care if using conservative GC makes their code segfault sometimes or are willing to run with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, we already do use conservative GC in Julia 1.11 and master... though we do it by catching the "segfaults", though really the only ones we had so far were due to incorrect values for the stack start being reported causing us to read into guard pages. Though perhaps we'd be crashing more if we used tasks extensively? I don't know enough about what the problem is right now / how "wrong" the currently reported values are? I thought they are just "too big" as in the stack is overestimated? But maybe that's not true anymore? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The observation now is that values won't be allocated correctly once they are provably garbage (during optimization), which may lead to the GC trying to access memory that is not valid to access. So you need to either turn off optimizations (-O0) or use a precise stack scan, or just hope to be lucky that it doesn't crash often. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still don't quite understand what that means. Anyway based on the bits I do understand I hope we won't be affected because we don't scan for general Julia objects, just for those of a specific "foreign" type. Since these are allocated via the C ABI from C code I don't think the LLVM optimizer has any chance of optimizing them away in any way... |
||
jl_atomic_store_release(&ptls->gc_state, state); | ||
if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE) | ||
jl_gc_safepoint_(ptls); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -237,16 +237,24 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s | |
emit_signal_fence(builder); | ||
} | ||
|
||
static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final) | ||
static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, llvm::Value *old_state, bool final) | ||
{ | ||
using namespace llvm; | ||
Value *ptls = get_current_ptls_from_task(builder, task, tbaa); | ||
Type *T_int8 = state->getType(); | ||
unsigned offset = offsetof(jl_tls_states_t, gc_state); | ||
Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state"); | ||
if (old_state == nullptr) { | ||
old_state = builder.CreateLoad(T_int8, gc_state); | ||
cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic); | ||
} | ||
if (isa<Constant>(state) ? state == builder.getInt8(0) : | ||
isa<Constant>(old_state) ? old_state == builder.getInt8(0) : true) { | ||
unsigned offset = offsetof(jl_task_t, ctx.activefp); | ||
Value *currentfp = builder.CreateConstInBoundsGEP1_32(T_int8, task, offset, "gc_state"); | ||
Value *fp = builder.CreateIntrinsic(Intrinsic::frameaddress, {builder.getPtrTy()}, {builder.getInt32(0)}); | ||
builder.CreateAlignedStore(fp, currentfp, Align(sizeof(void*))); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no idea how this code is supposed to work, so I can't review it other than on a superficial level: My guess would be that this function is a code-emitting counterpart to |
||
builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release); | ||
if (auto *C = dyn_cast<ConstantInt>(old_state)) | ||
if (C->isZero()) | ||
|
@@ -261,39 +269,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T | |
builder.CreateICmpEQ(state, zero8)), | ||
passBB, exitBB); | ||
builder.SetInsertPoint(passBB); | ||
MDNode *tbaa = get_tbaa_const(builder.getContext()); | ||
emit_gc_safepoint(builder, T_size, ptls, tbaa, final); | ||
emit_gc_safepoint(builder, T_size, ptls, get_tbaa_const(builder.getContext()), final); | ||
builder.CreateBr(exitBB); | ||
builder.SetInsertPoint(exitBB); | ||
return old_state; | ||
} | ||
|
||
static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final) | ||
static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final) | ||
{ | ||
using namespace llvm; | ||
Value *state = builder.getInt8(0); | ||
return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final); | ||
return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final); | ||
} | ||
|
||
static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final) | ||
static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final) | ||
{ | ||
using namespace llvm; | ||
Value *old_state = builder.getInt8(JL_GC_STATE_UNSAFE); | ||
return emit_gc_state_set(builder, T_size, ptls, state, old_state, final); | ||
return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final); | ||
} | ||
|
||
static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final) | ||
static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final) | ||
{ | ||
using namespace llvm; | ||
Value *state = builder.getInt8(JL_GC_STATE_SAFE); | ||
return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final); | ||
return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final); | ||
} | ||
|
||
static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final) | ||
static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final) | ||
{ | ||
using namespace llvm; | ||
Value *old_state = builder.getInt8(JL_GC_STATE_SAFE); | ||
return emit_gc_state_set(builder, T_size, ptls, state, old_state, final); | ||
return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final); | ||
} | ||
|
||
// Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, that one seems simple enough :-)