diff options
| author | 2021-04-24 11:27:06 +0800 | |
|---|---|---|
| committer | 2021-05-17 18:19:16 +0000 | |
| commit | 0d290721a8d63d5e0d38a64d3ecf922a91becb10 (patch) | |
| tree | 02daf3bb3ae703da0a9b99a307f8bbfb2c03c748 /runtime/base/mutex.cc | |
| parent | c63d9672264e894d8d409e8d582b4e086b26abca (diff) | |
Dump java backtrace when contending lock too long time.
It’s easy to cause a problem when execute time-consuming work inside
a synchronized method/block. Previously, we use "dvm_lock_sample" in
event log to help us look into.
But it's always after the event, "dvm_lock_sample" is printed after the
owner thread exited the critical section, we don't know more details
that which statement get stuck or more time-consuming.
This implementation provides a lot more information about what the
contending thread is doing. when a thread contend for a long time,
we will print backtrack which thread holding the lock.
Maybe many threads waiting this lock, those threads will request
printing backtrack many times, which is helpful for finding and
locating problems.
This mechanism is only suitable for very long wait times, not lots of
short wait times.
This feature is heavyweight, it's not enabled by default. we can
turn on by "-XX:MonitorTimeoutEnable" in AndroidRuntime::startVm.
Bug:140968481
Test: Manually check contention log messages.
Signed-off-by: wangguibo <wangguibo@xiaomi.com>
Change-Id: I29784d11355ae3c581723254848a87cd80e71456
Diffstat (limited to 'runtime/base/mutex.cc')
| -rw-r--r-- | runtime/base/mutex.cc | 82 |
1 files changed, 80 insertions, 2 deletions
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 0b8c781858..6574ec0db6 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -31,11 +31,18 @@ #include "mutex-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-inl.h" +#include "thread.h" +#include "thread_list.h" namespace art { using android::base::StringPrintf; +static constexpr uint64_t kIntervalMillis = 50; +static constexpr int kMonitorTimeoutTryMax = 5; + +static const char* kLastDumpStackTime = "LastDumpStackTime"; + struct AllMutexData { // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait). Atomic<const BaseMutex*> all_mutexes_guard; @@ -45,6 +52,13 @@ struct AllMutexData { }; static struct AllMutexData gAllMutexData[kAllMutexDataSize]; +struct DumpStackLastTimeTLSData : public art::TLSData { + explicit DumpStackLastTimeTLSData(uint64_t last_dump_time_ms) { + last_dump_time_ms_ = last_dump_time_ms; + } + uint64_t last_dump_time_ms_; +}; + #if ART_USE_FUTEXES static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) { const int32_t one_sec = 1000 * 1000 * 1000; // one second in nanoseconds. @@ -443,15 +457,28 @@ void Mutex::ExclusiveLock(Thread* self) { if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) { self->CheckEmptyCheckpointFromMutex(); } + + uint64_t wait_start_ms = enable_monitor_timeout_ ? MilliTime() : 0; + uint64_t try_times = 0; do { + timespec timeout_ts; + timeout_ts.tv_sec = 0; + timeout_ts.tv_nsec = Runtime::Current()->GetMonitorTimeoutNs(); if (futex(state_and_contenders_.Address(), FUTEX_WAIT_PRIVATE, cur_state, - nullptr, nullptr, 0) != 0) { + enable_monitor_timeout_ ? &timeout_ts : nullptr , nullptr, 0) != 0) { // We only went to sleep after incrementing and contenders and checking that the // lock is still held by someone else. EAGAIN and EINTR both indicate a spurious // failure, try again from the beginning. We don't use TEMP_FAILURE_RETRY so we can // intentionally retry to acquire the lock. if ((errno != EAGAIN) && (errno != EINTR)) { - PLOG(FATAL) << "futex wait failed for " << name_; + if (errno == ETIMEDOUT) { + try_times++; + if (try_times <= kMonitorTimeoutTryMax) { + DumpStack(self, wait_start_ms, try_times); + } + } else { + PLOG(FATAL) << "futex wait failed for " << name_; + } } } SleepIfRuntimeDeleted(self); @@ -481,6 +508,57 @@ void Mutex::ExclusiveLock(Thread* self) { } } +void Mutex::DumpStack(Thread* self, uint64_t wait_start_ms, uint64_t try_times) { + ScopedObjectAccess soa(self); + Locks::thread_list_lock_->ExclusiveLock(self); + std::string owner_stack_dump; + pid_t owner_tid = GetExclusiveOwnerTid(); + Thread *owner = Runtime::Current()->GetThreadList()->FindThreadByTid(owner_tid); + if (owner != nullptr) { + if (IsDumpFrequent(owner, try_times)) { + Locks::thread_list_lock_->ExclusiveUnlock(self); + LOG(WARNING) << "Contention with tid " << owner_tid << ", monitor id " << monitor_id_; + return; + } + struct CollectStackTrace : public Closure { + void Run(art::Thread* thread) override + REQUIRES_SHARED(art::Locks::mutator_lock_) { + if (IsDumpFrequent(thread)) { + return; + } + thread->SetCustomTLS(kLastDumpStackTime, new DumpStackLastTimeTLSData(MilliTime())); + thread->DumpJavaStack(oss); + } + std::ostringstream oss; + }; + CollectStackTrace owner_trace; + owner->RequestSynchronousCheckpoint(&owner_trace); + owner_stack_dump = owner_trace.oss.str(); + uint64_t wait_ms = MilliTime() - wait_start_ms; + LOG(WARNING) << "Monitor contention with tid " << owner_tid << ", wait time: " << wait_ms + << "ms, monitor id: " << monitor_id_ + << "\nPerfMonitor owner thread(" << owner_tid << ") stack is:\n" + << owner_stack_dump; + } else { + Locks::thread_list_lock_->ExclusiveUnlock(self); + } +} + +bool Mutex::IsDumpFrequent(Thread* thread, uint64_t try_times) { + uint64_t last_dump_time_ms = 0; + DumpStackLastTimeTLSData* tls_data = + reinterpret_cast<DumpStackLastTimeTLSData*>(thread->GetCustomTLS(kLastDumpStackTime)); + if (tls_data != nullptr) { + last_dump_time_ms = tls_data->last_dump_time_ms_; + } + uint64_t interval = MilliTime() - last_dump_time_ms; + if (interval < kIntervalMillis * try_times) { + return true; + } else { + return false; + } +} + bool Mutex::ExclusiveTryLock(Thread* self) { DCHECK(self == nullptr || self == Thread::Current()); if (kDebugLocking && !recursive_) { |