diff options
| -rw-r--r-- | runtime/base/mutex.cc | 82 | ||||
| -rw-r--r-- | runtime/base/mutex.h | 16 | ||||
| -rw-r--r-- | runtime/monitor.cc | 20 | ||||
| -rw-r--r-- | runtime/monitor.h | 8 | ||||
| -rw-r--r-- | runtime/parsed_options.cc | 7 | ||||
| -rw-r--r-- | runtime/runtime.cc | 18 | ||||
| -rw-r--r-- | runtime/runtime.h | 10 | ||||
| -rw-r--r-- | runtime/runtime_options.def | 2 | ||||
| -rw-r--r-- | runtime/thread_list.cc | 24 | ||||
| -rw-r--r-- | runtime/thread_list.h | 4 |
10 files changed, 173 insertions, 18 deletions
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc index 0b8c781858..6574ec0db6 100644 --- a/runtime/base/mutex.cc +++ b/runtime/base/mutex.cc @@ -31,11 +31,18 @@ #include "mutex-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-inl.h" +#include "thread.h" +#include "thread_list.h" namespace art { using android::base::StringPrintf; +static constexpr uint64_t kIntervalMillis = 50; +static constexpr int kMonitorTimeoutTryMax = 5; + +static const char* kLastDumpStackTime = "LastDumpStackTime"; + struct AllMutexData { // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait). Atomic<const BaseMutex*> all_mutexes_guard; @@ -45,6 +52,13 @@ struct AllMutexData { }; static struct AllMutexData gAllMutexData[kAllMutexDataSize]; +struct DumpStackLastTimeTLSData : public art::TLSData { + explicit DumpStackLastTimeTLSData(uint64_t last_dump_time_ms) { + last_dump_time_ms_ = last_dump_time_ms; + } + uint64_t last_dump_time_ms_; +}; + #if ART_USE_FUTEXES static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) { const int32_t one_sec = 1000 * 1000 * 1000; // one second in nanoseconds. @@ -443,15 +457,28 @@ void Mutex::ExclusiveLock(Thread* self) { if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) { self->CheckEmptyCheckpointFromMutex(); } + + uint64_t wait_start_ms = enable_monitor_timeout_ ? MilliTime() : 0; + uint64_t try_times = 0; do { + timespec timeout_ts; + timeout_ts.tv_sec = 0; + timeout_ts.tv_nsec = Runtime::Current()->GetMonitorTimeoutNs(); if (futex(state_and_contenders_.Address(), FUTEX_WAIT_PRIVATE, cur_state, - nullptr, nullptr, 0) != 0) { + enable_monitor_timeout_ ? &timeout_ts : nullptr , nullptr, 0) != 0) { // We only went to sleep after incrementing and contenders and checking that the // lock is still held by someone else. EAGAIN and EINTR both indicate a spurious // failure, try again from the beginning. We don't use TEMP_FAILURE_RETRY so we can // intentionally retry to acquire the lock. if ((errno != EAGAIN) && (errno != EINTR)) { - PLOG(FATAL) << "futex wait failed for " << name_; + if (errno == ETIMEDOUT) { + try_times++; + if (try_times <= kMonitorTimeoutTryMax) { + DumpStack(self, wait_start_ms, try_times); + } + } else { + PLOG(FATAL) << "futex wait failed for " << name_; + } } } SleepIfRuntimeDeleted(self); @@ -481,6 +508,57 @@ void Mutex::ExclusiveLock(Thread* self) { } } +void Mutex::DumpStack(Thread* self, uint64_t wait_start_ms, uint64_t try_times) { + ScopedObjectAccess soa(self); + Locks::thread_list_lock_->ExclusiveLock(self); + std::string owner_stack_dump; + pid_t owner_tid = GetExclusiveOwnerTid(); + Thread *owner = Runtime::Current()->GetThreadList()->FindThreadByTid(owner_tid); + if (owner != nullptr) { + if (IsDumpFrequent(owner, try_times)) { + Locks::thread_list_lock_->ExclusiveUnlock(self); + LOG(WARNING) << "Contention with tid " << owner_tid << ", monitor id " << monitor_id_; + return; + } + struct CollectStackTrace : public Closure { + void Run(art::Thread* thread) override + REQUIRES_SHARED(art::Locks::mutator_lock_) { + if (IsDumpFrequent(thread)) { + return; + } + thread->SetCustomTLS(kLastDumpStackTime, new DumpStackLastTimeTLSData(MilliTime())); + thread->DumpJavaStack(oss); + } + std::ostringstream oss; + }; + CollectStackTrace owner_trace; + owner->RequestSynchronousCheckpoint(&owner_trace); + owner_stack_dump = owner_trace.oss.str(); + uint64_t wait_ms = MilliTime() - wait_start_ms; + LOG(WARNING) << "Monitor contention with tid " << owner_tid << ", wait time: " << wait_ms + << "ms, monitor id: " << monitor_id_ + << "\nPerfMonitor owner thread(" << owner_tid << ") stack is:\n" + << owner_stack_dump; + } else { + Locks::thread_list_lock_->ExclusiveUnlock(self); + } +} + +bool Mutex::IsDumpFrequent(Thread* thread, uint64_t try_times) { + uint64_t last_dump_time_ms = 0; + DumpStackLastTimeTLSData* tls_data = + reinterpret_cast<DumpStackLastTimeTLSData*>(thread->GetCustomTLS(kLastDumpStackTime)); + if (tls_data != nullptr) { + last_dump_time_ms = tls_data->last_dump_time_ms_; + } + uint64_t interval = MilliTime() - last_dump_time_ms; + if (interval < kIntervalMillis * try_times) { + return true; + } else { + return false; + } +} + bool Mutex::ExclusiveTryLock(Thread* self) { DCHECK(self == nullptr || self == Thread::Current()); if (kDebugLocking && !recursive_) { diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index e4a7e1c9ec..8f2a8eac39 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -221,6 +221,18 @@ class LOCKABLE Mutex : public BaseMutex { void Dump(std::ostream& os) const override; + void DumpStack(Thread *self, uint64_t wait_start_ms, uint64_t try_times = 1); + + static bool IsDumpFrequent(Thread *self, uint64_t try_times = 1); + + void setEnableMonitorTimeout() { + enable_monitor_timeout_ = true; + } + + void setMonitorId(uint32_t monitorId) { + monitor_id_ = monitorId; + } + // For negative capabilities in clang annotations. const Mutex& operator!() const { return *this; } @@ -275,6 +287,10 @@ class LOCKABLE Mutex : public BaseMutex { unsigned int recursion_count_; const bool recursive_; // Can the lock be recursively held? + bool enable_monitor_timeout_ = false; + + uint32_t monitor_id_; + friend class ConditionVariable; DISALLOW_COPY_AND_ASSIGN(Mutex); }; diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 295e76c4c5..2f590227af 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -42,6 +42,7 @@ #include "thread_list.h" #include "verifier/method_verifier.h" #include "well_known_classes.h" +#include <android-base/properties.h> static_assert(ART_USE_FUTEXES); @@ -116,6 +117,11 @@ Monitor::Monitor(Thread* self, Thread* owner, ObjPtr<mirror::Object> obj, int32_ // with the owner unlocking the thin-lock. CHECK(owner == nullptr || owner == self || owner->IsSuspended()); // The identity hash code is set for the life time of the monitor. + + bool monitor_timeout_enabled = Runtime::Current()->IsMonitorTimeoutEnabled(); + if (monitor_timeout_enabled) { + MaybeEnableTimeout(); + } } Monitor::Monitor(Thread* self, @@ -144,6 +150,11 @@ Monitor::Monitor(Thread* self, // with the owner unlocking the thin-lock. CHECK(owner == nullptr || owner == self || owner->IsSuspended()); // The identity hash code is set for the life time of the monitor. + + bool monitor_timeout_enabled = Runtime::Current()->IsMonitorTimeoutEnabled(); + if (monitor_timeout_enabled) { + MaybeEnableTimeout(); + } } int32_t Monitor::GetHashCode() { @@ -1711,4 +1722,13 @@ MonitorInfo::MonitorInfo(ObjPtr<mirror::Object> obj) : owner_(nullptr), entry_co } } +void Monitor::MaybeEnableTimeout() { + std::string current_package = Runtime::Current()->GetProcessPackageName(); + bool enabled_for_app = android::base::GetBoolProperty("debug.art.monitor.app", false); + if (current_package == "android" || enabled_for_app) { + monitor_lock_.setEnableMonitorTimeout(); + monitor_lock_.setMonitorId(monitor_id_); + } +} + } // namespace art diff --git a/runtime/monitor.h b/runtime/monitor.h index c0a0a4fadc..99e071e7ab 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -61,6 +61,12 @@ class Monitor { // a lock word. See Runtime::max_spins_before_thin_lock_inflation_. constexpr static size_t kDefaultMaxSpinsBeforeThinLockInflation = 50; + static constexpr int kDefaultMonitorTimeoutMs = 500; + + static constexpr int kMonitorTimeoutMinMs = 200; + + static constexpr int kMonitorTimeoutMaxMs = 1000; // 1 second + ~Monitor(); static void Init(uint32_t lock_profiling_threshold, uint32_t stack_dump_lock_profiling_threshold); @@ -413,6 +419,8 @@ class Monitor { void CheckLockOwnerRequest(Thread* self) REQUIRES(monitor_lock_) REQUIRES_SHARED(Locks::mutator_lock_); + void MaybeEnableTimeout() REQUIRES(Locks::mutator_lock_); + // The denser encoded version of this monitor as stored in the lock word. MonitorId monitor_id_; diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index 1cb8e9f1ea..1bd905dd11 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -383,6 +383,13 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize .Define("-XX:ThreadSuspendTimeout=_") // in ms .WithType<MillisecondsToNanoseconds>() // store as ns .IntoKey(M::ThreadSuspendTimeout) + .Define("-XX:MonitorTimeoutEnable=_") + .WithType<bool>() + .WithValueMap({{"false", false}, {"true", true}}) + .IntoKey(M::MonitorTimeoutEnable) + .Define("-XX:MonitorTimeout=_") // in ms + .WithType<int>() + .IntoKey(M::MonitorTimeout) .Define("-XX:GlobalRefAllocStackTraceLimit=_") // Number of free slots to enable tracing. .WithType<unsigned int>() .IntoKey(M::GlobalRefAllocStackTraceLimit) diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 572e071f3b..53431508c7 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -284,6 +284,8 @@ Runtime::Runtime() async_exceptions_thrown_(false), non_standard_exits_enabled_(false), is_java_debuggable_(false), + monitor_timeout_enable_(false), + monitor_timeout_ns_(0), zygote_max_failed_boots_(0), experimental_flags_(ExperimentalFlags::kNone), oat_file_manager_(nullptr), @@ -1047,13 +1049,15 @@ void Runtime::InitNonZygoteOrPostFork( DCHECK(!IsZygote()); - if (is_system_server && profile_system_server) { + if (is_system_server) { // Set the system server package name to "android". // This is used to tell the difference between samples provided by system server // and samples generated by other apps when processing boot image profiles. SetProcessPackageName("android"); - jit_options_->SetWaitForJitNotificationsToSaveProfile(false); - VLOG(profiler) << "Enabling system server profiles"; + if (profile_system_server) { + jit_options_->SetWaitForJitNotificationsToSaveProfile(false); + VLOG(profiler) << "Enabling system server profiles"; + } } // Create the thread pools. @@ -1412,6 +1416,14 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { thread_list_ = new ThreadList(runtime_options.GetOrDefault(Opt::ThreadSuspendTimeout)); intern_table_ = new InternTable; + monitor_timeout_enable_ = runtime_options.GetOrDefault(Opt::MonitorTimeoutEnable); + int monitor_timeout_ms = runtime_options.GetOrDefault(Opt::MonitorTimeout); + if (monitor_timeout_ms < Monitor::kMonitorTimeoutMinMs || + monitor_timeout_ms >= Monitor::kMonitorTimeoutMaxMs) { + LOG(ERROR) << "Improper monitor timeout could cause crash!"; + } + monitor_timeout_ns_ = MsToNs(monitor_timeout_ms); + verify_ = runtime_options.GetOrDefault(Opt::Verify); target_sdk_version_ = runtime_options.GetOrDefault(Opt::TargetSdkVersion); diff --git a/runtime/runtime.h b/runtime/runtime.h index 6f11916699..88f7bc0240 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -981,6 +981,13 @@ class Runtime { return perfetto_javaheapprof_enabled_; } + bool IsMonitorTimeoutEnabled() const { + return monitor_timeout_enable_; + } + + uint64_t GetMonitorTimeoutNs() const { + return monitor_timeout_ns_; + } // Return true if we should load oat files as executable or not. bool GetOatFilesExecutable() const; @@ -1239,6 +1246,9 @@ class Runtime { // Whether Java code needs to be debuggable. bool is_java_debuggable_; + bool monitor_timeout_enable_; + uint64_t monitor_timeout_ns_; + // Whether or not this application can be profiled by the shell user, // even when running on a device that is running in user mode. bool is_profileable_from_shell_ = false; diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index ef5bed7779..3f0e3cca59 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -66,6 +66,8 @@ RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \ LongGCLogThreshold, gc::Heap::kDefaultLongGCLogThreshold) RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \ ThreadSuspendTimeout, ThreadList::kDefaultThreadSuspendTimeout) +RUNTIME_OPTIONS_KEY (bool, MonitorTimeoutEnable, false) +RUNTIME_OPTIONS_KEY (int, MonitorTimeout, Monitor::kDefaultMonitorTimeoutMs) RUNTIME_OPTIONS_KEY (Unit, DumpGCPerformanceOnShutdown) RUNTIME_OPTIONS_KEY (Unit, DumpRegionInfoBeforeGC) RUNTIME_OPTIONS_KEY (Unit, DumpRegionInfoAfterGC) diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index fb4f6d304d..e0d62d0e03 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -118,15 +118,6 @@ bool ThreadList::Contains(Thread* thread) { return find(list_.begin(), list_.end(), thread) != list_.end(); } -bool ThreadList::Contains(pid_t tid) { - for (const auto& thread : list_) { - if (thread->GetTid() == tid) { - return true; - } - } - return false; -} - pid_t ThreadList::GetLockOwner() { return Locks::thread_list_lock_->GetExclusiveOwnerTid(); } @@ -179,12 +170,12 @@ void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) char* end; pid_t tid = strtol(e->d_name, &end, 10); if (!*end) { - bool contains; + Thread* thread; { MutexLock mu(self, *Locks::thread_list_lock_); - contains = Contains(tid); + thread = FindThreadByTid(tid); } - if (!contains) { + if (thread != nullptr) { DumpUnattachedThread(os, tid, dump_native_stack); } } @@ -1110,6 +1101,15 @@ Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) { return nullptr; } +Thread* ThreadList::FindThreadByTid(int tid) { + for (const auto& thread : list_) { + if (thread->GetTid() == tid) { + return thread; + } + } + return nullptr; +} + void ThreadList::WaitForOtherNonDaemonThreadsToExit(bool check_no_birth) { ScopedTrace trace(__PRETTY_FUNCTION__); Thread* self = Thread::Current(); diff --git a/runtime/thread_list.h b/runtime/thread_list.h index 1dcdf6241a..87a4c8dc61 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -104,6 +104,9 @@ class ThreadList { // Find an existing thread (or self) by its thread id (not tid). Thread* FindThreadByThreadId(uint32_t thread_id) REQUIRES(Locks::thread_list_lock_); + // Find an existing thread (or self) by its tid (not thread id). + Thread* FindThreadByTid(int tid) REQUIRES(Locks::thread_list_lock_); + // Does the thread list still contain the given thread, or one at the same address? // Used by Monitor to provide (mostly accurate) debugging information. bool Contains(Thread* thread) REQUIRES(Locks::thread_list_lock_); @@ -191,7 +194,6 @@ class ThreadList { uint32_t AllocThreadId(Thread* self); void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_); - bool Contains(pid_t tid) REQUIRES(Locks::thread_list_lock_); size_t RunCheckpoint(Closure* checkpoint_function, bool includeSuspended) REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); |