Skip to content

Commit

Permalink
Merge pull request #2692 from chenBright/detect_deadlock
Browse files Browse the repository at this point in the history
Support pthread mutex deadlock detection
  • Loading branch information
lorinlee authored Jul 19, 2024
2 parents 3419a3d + df47a63 commit d227d93
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 23 deletions.
72 changes: 64 additions & 8 deletions src/bthread/mutex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@
#include "butil/logging.h"
#include "butil/object_pool.h"
#include "bthread/butex.h" // butex_*
#include "bthread/processor.h" // cpu_relax, barrier
#include "bthread/mutex.h" // bthread_mutex_t
#include "bthread/sys_futex.h"
#include "bthread/log.h"
#include "butil/debug/stack_trace.h"

extern "C" {
extern void* __attribute__((weak)) _dl_sym(void* handle, const char* symbol, void* caller);
Expand Down Expand Up @@ -382,8 +382,10 @@ make_contention_site_invalid(bthread_contention_site_t* cs) {
// technique avoids calling pthread_once each time.
typedef int (*MutexOp)(pthread_mutex_t*);
int first_sys_pthread_mutex_lock(pthread_mutex_t* mutex);
int first_sys_pthread_mutex_trylock(pthread_mutex_t* mutex);
int first_sys_pthread_mutex_unlock(pthread_mutex_t* mutex);
static MutexOp sys_pthread_mutex_lock = first_sys_pthread_mutex_lock;
static MutexOp sys_pthread_mutex_trylock = first_sys_pthread_mutex_trylock;
static MutexOp sys_pthread_mutex_unlock = first_sys_pthread_mutex_unlock;
static pthread_once_t init_sys_mutex_lock_once = PTHREAD_ONCE_INIT;

Expand Down Expand Up @@ -429,9 +431,12 @@ static void init_sys_mutex_lock() {
sys_pthread_mutex_lock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_lock");
sys_pthread_mutex_unlock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_unlock");
}
// In some system, _dl_sym may cause symbol lookup error: undefined symbol: pthread_mutex_trylock.
sys_pthread_mutex_trylock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_trylock");
#elif defined(OS_MACOSX)
// TODO: look workaround for dlsym on mac
sys_pthread_mutex_lock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_lock");
sys_pthread_mutex_trylock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_trylock");
sys_pthread_mutex_unlock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_unlock");
#endif
}
Expand All @@ -443,6 +448,12 @@ int first_sys_pthread_mutex_lock(pthread_mutex_t* mutex) {
pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
return sys_pthread_mutex_lock(mutex);
}

int first_sys_pthread_mutex_trylock(pthread_mutex_t* mutex) {
pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
return sys_pthread_mutex_trylock(mutex);
}

int first_sys_pthread_mutex_unlock(pthread_mutex_t* mutex) {
pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
return sys_pthread_mutex_unlock(mutex);
Expand All @@ -457,6 +468,27 @@ inline uint64_t hash_mutex_ptr(const Mutex* m) {
// code are never sampled, otherwise deadlock may occur.
static __thread bool tls_inside_lock = false;

// ++tls_pthread_lock_count when pthread locking,
// --tls_pthread_lock_count when pthread unlocking.
// Only when it is equal to 0, it is safe for the bthread to be scheduled.
static __thread int tls_pthread_lock_count = 0;

void CheckBthreadScheSafety() {
if (BAIDU_LIKELY(0 == tls_pthread_lock_count)) {
return;
}

static butil::atomic<bool> b_sched_in_p_lock_logged{false};
if (BAIDU_UNLIKELY(!b_sched_in_p_lock_logged.exchange(
true, butil::memory_order_relaxed))) {
butil::debug::StackTrace trace(true);
// It can only be checked once because the counter is messed up.
LOG(ERROR) << "bthread is suspended while holding"
<< tls_pthread_lock_count << " pthread locks."
<< std::endl << trace.ToString();
}
}

// Speed up with TLS:
// Most pthread_mutex are locked and unlocked in the same thread. Putting
// contention information in TLS avoids collisions that may occur in
Expand Down Expand Up @@ -543,14 +575,20 @@ void submit_contention(const bthread_contention_site_t& csite, int64_t now_ns) {

namespace internal {
BUTIL_FORCE_INLINE int pthread_mutex_lock_internal(pthread_mutex_t* mutex) {
++bthread::tls_pthread_lock_count;
return sys_pthread_mutex_lock(mutex);
}

BUTIL_FORCE_INLINE int pthread_mutex_trylock_internal(pthread_mutex_t* mutex) {
return ::pthread_mutex_trylock(mutex);
int rc = sys_pthread_mutex_trylock(mutex);
if (0 == rc) {
++tls_pthread_lock_count;
}
return rc;
}

BUTIL_FORCE_INLINE int pthread_mutex_unlock_internal(pthread_mutex_t* mutex) {
--tls_pthread_lock_count;
return sys_pthread_mutex_unlock(mutex);
}

Expand Down Expand Up @@ -621,6 +659,11 @@ BUTIL_FORCE_INLINE int pthread_mutex_lock_impl(Mutex* mutex) {
return rc;
}

template <typename Mutex>
BUTIL_FORCE_INLINE int pthread_mutex_trylock_impl(Mutex* mutex) {
return pthread_mutex_trylock_internal(mutex);
}

template <typename Mutex>
BUTIL_FORCE_INLINE int pthread_mutex_unlock_impl(Mutex* mutex) {
// Don't change behavior of unlock when profiler is off.
Expand Down Expand Up @@ -670,6 +713,10 @@ BUTIL_FORCE_INLINE int pthread_mutex_lock_impl(pthread_mutex_t* mutex) {
return internal::pthread_mutex_lock_impl(mutex);
}

BUTIL_FORCE_INLINE int pthread_mutex_trylock_impl(pthread_mutex_t* mutex) {
return internal::pthread_mutex_trylock_impl(mutex);
}

BUTIL_FORCE_INLINE int pthread_mutex_unlock_impl(pthread_mutex_t* mutex) {
return internal::pthread_mutex_unlock_impl(mutex);
}
Expand Down Expand Up @@ -733,24 +780,30 @@ int FastPthreadMutex::lock_contended() {
}

void FastPthreadMutex::lock() {
bthread::MutexInternal* split = (bthread::MutexInternal*)&_futex;
auto split = (bthread::MutexInternal*)&_futex;
if (split->locked.exchange(1, butil::memory_order_acquire)) {
(void)lock_contended();
}
++tls_pthread_lock_count;
}

bool FastPthreadMutex::try_lock() {
bthread::MutexInternal* split = (bthread::MutexInternal*)&_futex;
return !split->locked.exchange(1, butil::memory_order_acquire);
auto split = (bthread::MutexInternal*)&_futex;
bool lock = !split->locked.exchange(1, butil::memory_order_acquire);
if (lock) {
++tls_pthread_lock_count;
}
return lock;
}

void FastPthreadMutex::unlock() {
butil::atomic<unsigned>* whole = (butil::atomic<unsigned>*)&_futex;
auto whole = (butil::atomic<unsigned>*)&_futex;
const unsigned prev = whole->exchange(0, butil::memory_order_release);
// CAUTION: the mutex may be destroyed, check comments before butex_create
if (prev != BTHREAD_MUTEX_LOCKED) {
futex_wake_private(whole, 1);
}
--tls_pthread_lock_count;
}

} // namespace internal
Expand Down Expand Up @@ -879,10 +932,13 @@ int bthread_mutex_unlock(bthread_mutex_t* m) {
return 0;
}

int pthread_mutex_lock (pthread_mutex_t *__mutex) {
int pthread_mutex_lock(pthread_mutex_t* __mutex) {
return bthread::pthread_mutex_lock_impl(__mutex);
}
int pthread_mutex_unlock (pthread_mutex_t *__mutex) {
int pthread_mutex_trylock(pthread_mutex_t* __mutex) {
return bthread::pthread_mutex_trylock_impl(__mutex);
}
int pthread_mutex_unlock(pthread_mutex_t* __mutex) {
return bthread::pthread_mutex_unlock_impl(__mutex);
}

Expand Down
3 changes: 3 additions & 0 deletions src/bthread/task_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,8 @@ void TaskGroup::sched(TaskGroup** pg) {
sched_to(pg, next_tid);
}

extern void CheckBthreadScheSafety();

void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta) {
TaskGroup* g = *pg;
#ifndef NDEBUG
Expand Down Expand Up @@ -622,6 +624,7 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta) {

if (cur_meta->stack != NULL) {
if (next_meta->stack != cur_meta->stack) {
CheckBthreadScheSafety();
jump_stack(cur_meta->stack, next_meta->stack);
// probably went to another group, need to assign g again.
g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group);
Expand Down
5 changes: 3 additions & 2 deletions src/butil/debug/stack_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ BUTIL_EXPORT bool EnableInProcessStackDumpingForSandbox();
class BUTIL_EXPORT StackTrace {
public:
// Creates a stacktrace from the current location.
StackTrace();
// Exclude constructor frame of StackTrace if |exclude_self| is true.
explicit StackTrace(bool exclude_self = false);

// Creates a stacktrace from an existing array of instruction
// pointers (such as returned by Addresses()). |count| will be
Expand Down Expand Up @@ -82,7 +83,7 @@ class BUTIL_EXPORT StackTrace {
// doesn't give much more information.
static const int kMaxTraces = 62;

void* trace_[kMaxTraces];
void* trace_[kMaxTraces]{};

// The number of valid frames in |trace_|.
size_t count_;
Expand Down
17 changes: 11 additions & 6 deletions src/butil/debug/stack_trace_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -746,19 +746,24 @@ bool EnableInProcessStackDumping() {
return success;
}

StackTrace::StackTrace() {
StackTrace::StackTrace(bool exclude_self) {
// NOTE: This code MUST be async-signal safe (it's used by in-process
// stack dumping signal handler). NO malloc or stdio is allowed here.

if (GetStackTrace) {
count_ = GetStackTrace(trace_, arraysize(trace_), 0);
count_ = GetStackTrace(trace_, arraysize(trace_), exclude_self ? 1 : 0);
} else {
#if !defined(__UCLIBC__)
// Though the backtrace API man page does not list any possible negative
// return values, we take no chance.
count_ = butil::saturated_cast<size_t>(backtrace(trace_, arraysize(trace_)));
// Though the backtrace API man page does not list any possible negative
// return values, we take no chance.
count_ = butil::saturated_cast<size_t>(backtrace(trace_, arraysize(trace_)));
if (exclude_self && count_ > 1) {
// Skip the top frame.
memmove(trace_, trace_ + 1, (count_ - 1) * sizeof(void*));
count_--;
}
#else
count_ = 0;
count_ = 0;
#endif
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/butil/memory/scope_guard.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ operator+(ScopeExitHelper, Callback&& callback) {

#define BRPC_ANONYMOUS_VARIABLE(prefix) BAIDU_CONCAT(prefix, __COUNTER__)

// The code in the braces of BAIDU_SCOPE_EXIT always executes at the end of the scope.
// Variables used within BAIDU_SCOPE_EXIT are captured by reference.
// The code in the braces of BRPC_SCOPE_EXIT always executes at the end of the scope.
// Variables used within BRPC_SCOPE_EXIT are captured by reference.
// Example:
// int fd = open(...);
// BAIDU_SCOPE_EXIT {
// BRPC_SCOPE_EXIT {
// close(fd);
// };
// use fd ...
Expand Down
16 changes: 12 additions & 4 deletions test/stack_trace_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,18 @@ TEST_F(StackTraceTest, MAYBE_OutputToStream) {

// The test is used for manual testing, e.g., to see the raw output.
TEST_F(StackTraceTest, DebugOutputToStream) {
StackTrace trace;
std::ostringstream os;
trace.OutputToStream(&os);
VLOG(1) << os.str();
{
StackTrace trace;
std::ostringstream os;
trace.OutputToStream(&os);
VLOG(1) << os.str();
}
{
StackTrace trace(true);
std::ostringstream os;
trace.OutputToStream(&os);
VLOG(1) << os.str();
}
}

// The test is used for manual testing, e.g., to see the raw output.
Expand Down

0 comments on commit d227d93

Please sign in to comment.