summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--runtime/base/mutex.cc82
-rw-r--r--runtime/base/mutex.h16
-rw-r--r--runtime/monitor.cc20
-rw-r--r--runtime/monitor.h8
-rw-r--r--runtime/parsed_options.cc7
-rw-r--r--runtime/runtime.cc18
-rw-r--r--runtime/runtime.h10
-rw-r--r--runtime/runtime_options.def2
-rw-r--r--runtime/thread_list.cc24
-rw-r--r--runtime/thread_list.h4
10 files changed, 173 insertions, 18 deletions
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 0b8c781858..6574ec0db6 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -31,11 +31,18 @@
#include "mutex-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-inl.h"
+#include "thread.h"
+#include "thread_list.h"
namespace art {
using android::base::StringPrintf;
+static constexpr uint64_t kIntervalMillis = 50;
+static constexpr int kMonitorTimeoutTryMax = 5;
+
+static const char* kLastDumpStackTime = "LastDumpStackTime";
+
struct AllMutexData {
// A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
Atomic<const BaseMutex*> all_mutexes_guard;
@@ -45,6 +52,13 @@ struct AllMutexData {
};
static struct AllMutexData gAllMutexData[kAllMutexDataSize];
+struct DumpStackLastTimeTLSData : public art::TLSData {
+ explicit DumpStackLastTimeTLSData(uint64_t last_dump_time_ms) {
+ last_dump_time_ms_ = last_dump_time_ms;
+ }
+ uint64_t last_dump_time_ms_;
+};
+
#if ART_USE_FUTEXES
static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) {
const int32_t one_sec = 1000 * 1000 * 1000; // one second in nanoseconds.
@@ -443,15 +457,28 @@ void Mutex::ExclusiveLock(Thread* self) {
if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
self->CheckEmptyCheckpointFromMutex();
}
+
+ uint64_t wait_start_ms = enable_monitor_timeout_ ? MilliTime() : 0;
+ uint64_t try_times = 0;
do {
+ timespec timeout_ts;
+ timeout_ts.tv_sec = 0;
+ timeout_ts.tv_nsec = Runtime::Current()->GetMonitorTimeoutNs();
if (futex(state_and_contenders_.Address(), FUTEX_WAIT_PRIVATE, cur_state,
- nullptr, nullptr, 0) != 0) {
+ enable_monitor_timeout_ ? &timeout_ts : nullptr , nullptr, 0) != 0) {
// We only went to sleep after incrementing and contenders and checking that the
// lock is still held by someone else. EAGAIN and EINTR both indicate a spurious
// failure, try again from the beginning. We don't use TEMP_FAILURE_RETRY so we can
// intentionally retry to acquire the lock.
if ((errno != EAGAIN) && (errno != EINTR)) {
- PLOG(FATAL) << "futex wait failed for " << name_;
+ if (errno == ETIMEDOUT) {
+ try_times++;
+ if (try_times <= kMonitorTimeoutTryMax) {
+ DumpStack(self, wait_start_ms, try_times);
+ }
+ } else {
+ PLOG(FATAL) << "futex wait failed for " << name_;
+ }
}
}
SleepIfRuntimeDeleted(self);
@@ -481,6 +508,57 @@ void Mutex::ExclusiveLock(Thread* self) {
}
}
+void Mutex::DumpStack(Thread* self, uint64_t wait_start_ms, uint64_t try_times) {
+ ScopedObjectAccess soa(self);
+ Locks::thread_list_lock_->ExclusiveLock(self);
+ std::string owner_stack_dump;
+ pid_t owner_tid = GetExclusiveOwnerTid();
+ Thread *owner = Runtime::Current()->GetThreadList()->FindThreadByTid(owner_tid);
+ if (owner != nullptr) {
+ if (IsDumpFrequent(owner, try_times)) {
+ Locks::thread_list_lock_->ExclusiveUnlock(self);
+ LOG(WARNING) << "Contention with tid " << owner_tid << ", monitor id " << monitor_id_;
+ return;
+ }
+ struct CollectStackTrace : public Closure {
+ void Run(art::Thread* thread) override
+ REQUIRES_SHARED(art::Locks::mutator_lock_) {
+ if (IsDumpFrequent(thread)) {
+ return;
+ }
+ thread->SetCustomTLS(kLastDumpStackTime, new DumpStackLastTimeTLSData(MilliTime()));
+ thread->DumpJavaStack(oss);
+ }
+ std::ostringstream oss;
+ };
+ CollectStackTrace owner_trace;
+ owner->RequestSynchronousCheckpoint(&owner_trace);
+ owner_stack_dump = owner_trace.oss.str();
+ uint64_t wait_ms = MilliTime() - wait_start_ms;
+ LOG(WARNING) << "Monitor contention with tid " << owner_tid << ", wait time: " << wait_ms
+ << "ms, monitor id: " << monitor_id_
+ << "\nPerfMonitor owner thread(" << owner_tid << ") stack is:\n"
+ << owner_stack_dump;
+ } else {
+ Locks::thread_list_lock_->ExclusiveUnlock(self);
+ }
+}
+
+bool Mutex::IsDumpFrequent(Thread* thread, uint64_t try_times) {
+ uint64_t last_dump_time_ms = 0;
+ DumpStackLastTimeTLSData* tls_data =
+ reinterpret_cast<DumpStackLastTimeTLSData*>(thread->GetCustomTLS(kLastDumpStackTime));
+ if (tls_data != nullptr) {
+ last_dump_time_ms = tls_data->last_dump_time_ms_;
+ }
+ uint64_t interval = MilliTime() - last_dump_time_ms;
+ if (interval < kIntervalMillis * try_times) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
bool Mutex::ExclusiveTryLock(Thread* self) {
DCHECK(self == nullptr || self == Thread::Current());
if (kDebugLocking && !recursive_) {
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index e4a7e1c9ec..8f2a8eac39 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -221,6 +221,18 @@ class LOCKABLE Mutex : public BaseMutex {
void Dump(std::ostream& os) const override;
+ void DumpStack(Thread *self, uint64_t wait_start_ms, uint64_t try_times = 1);
+
+ static bool IsDumpFrequent(Thread *self, uint64_t try_times = 1);
+
+ void setEnableMonitorTimeout() {
+ enable_monitor_timeout_ = true;
+ }
+
+ void setMonitorId(uint32_t monitorId) {
+ monitor_id_ = monitorId;
+ }
+
// For negative capabilities in clang annotations.
const Mutex& operator!() const { return *this; }
@@ -275,6 +287,10 @@ class LOCKABLE Mutex : public BaseMutex {
unsigned int recursion_count_;
const bool recursive_; // Can the lock be recursively held?
+ bool enable_monitor_timeout_ = false;
+
+ uint32_t monitor_id_;
+
friend class ConditionVariable;
DISALLOW_COPY_AND_ASSIGN(Mutex);
};
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 295e76c4c5..2f590227af 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -42,6 +42,7 @@
#include "thread_list.h"
#include "verifier/method_verifier.h"
#include "well_known_classes.h"
+#include <android-base/properties.h>
static_assert(ART_USE_FUTEXES);
@@ -116,6 +117,11 @@ Monitor::Monitor(Thread* self, Thread* owner, ObjPtr<mirror::Object> obj, int32_
// with the owner unlocking the thin-lock.
CHECK(owner == nullptr || owner == self || owner->IsSuspended());
// The identity hash code is set for the life time of the monitor.
+
+ bool monitor_timeout_enabled = Runtime::Current()->IsMonitorTimeoutEnabled();
+ if (monitor_timeout_enabled) {
+ MaybeEnableTimeout();
+ }
}
Monitor::Monitor(Thread* self,
@@ -144,6 +150,11 @@ Monitor::Monitor(Thread* self,
// with the owner unlocking the thin-lock.
CHECK(owner == nullptr || owner == self || owner->IsSuspended());
// The identity hash code is set for the life time of the monitor.
+
+ bool monitor_timeout_enabled = Runtime::Current()->IsMonitorTimeoutEnabled();
+ if (monitor_timeout_enabled) {
+ MaybeEnableTimeout();
+ }
}
int32_t Monitor::GetHashCode() {
@@ -1711,4 +1722,13 @@ MonitorInfo::MonitorInfo(ObjPtr<mirror::Object> obj) : owner_(nullptr), entry_co
}
}
+void Monitor::MaybeEnableTimeout() {
+ std::string current_package = Runtime::Current()->GetProcessPackageName();
+ bool enabled_for_app = android::base::GetBoolProperty("debug.art.monitor.app", false);
+ if (current_package == "android" || enabled_for_app) {
+ monitor_lock_.setEnableMonitorTimeout();
+ monitor_lock_.setMonitorId(monitor_id_);
+ }
+}
+
} // namespace art
diff --git a/runtime/monitor.h b/runtime/monitor.h
index c0a0a4fadc..99e071e7ab 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -61,6 +61,12 @@ class Monitor {
// a lock word. See Runtime::max_spins_before_thin_lock_inflation_.
constexpr static size_t kDefaultMaxSpinsBeforeThinLockInflation = 50;
+ static constexpr int kDefaultMonitorTimeoutMs = 500;
+
+ static constexpr int kMonitorTimeoutMinMs = 200;
+
+ static constexpr int kMonitorTimeoutMaxMs = 1000; // 1 second
+
~Monitor();
static void Init(uint32_t lock_profiling_threshold, uint32_t stack_dump_lock_profiling_threshold);
@@ -413,6 +419,8 @@ class Monitor {
void CheckLockOwnerRequest(Thread* self)
REQUIRES(monitor_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
+ void MaybeEnableTimeout() REQUIRES(Locks::mutator_lock_);
+
// The denser encoded version of this monitor as stored in the lock word.
MonitorId monitor_id_;
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 1cb8e9f1ea..1bd905dd11 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -383,6 +383,13 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize
.Define("-XX:ThreadSuspendTimeout=_") // in ms
.WithType<MillisecondsToNanoseconds>() // store as ns
.IntoKey(M::ThreadSuspendTimeout)
+ .Define("-XX:MonitorTimeoutEnable=_")
+ .WithType<bool>()
+ .WithValueMap({{"false", false}, {"true", true}})
+ .IntoKey(M::MonitorTimeoutEnable)
+ .Define("-XX:MonitorTimeout=_") // in ms
+ .WithType<int>()
+ .IntoKey(M::MonitorTimeout)
.Define("-XX:GlobalRefAllocStackTraceLimit=_") // Number of free slots to enable tracing.
.WithType<unsigned int>()
.IntoKey(M::GlobalRefAllocStackTraceLimit)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 572e071f3b..53431508c7 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -284,6 +284,8 @@ Runtime::Runtime()
async_exceptions_thrown_(false),
non_standard_exits_enabled_(false),
is_java_debuggable_(false),
+ monitor_timeout_enable_(false),
+ monitor_timeout_ns_(0),
zygote_max_failed_boots_(0),
experimental_flags_(ExperimentalFlags::kNone),
oat_file_manager_(nullptr),
@@ -1047,13 +1049,15 @@ void Runtime::InitNonZygoteOrPostFork(
DCHECK(!IsZygote());
- if (is_system_server && profile_system_server) {
+ if (is_system_server) {
// Set the system server package name to "android".
// This is used to tell the difference between samples provided by system server
// and samples generated by other apps when processing boot image profiles.
SetProcessPackageName("android");
- jit_options_->SetWaitForJitNotificationsToSaveProfile(false);
- VLOG(profiler) << "Enabling system server profiles";
+ if (profile_system_server) {
+ jit_options_->SetWaitForJitNotificationsToSaveProfile(false);
+ VLOG(profiler) << "Enabling system server profiles";
+ }
}
// Create the thread pools.
@@ -1412,6 +1416,14 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) {
thread_list_ = new ThreadList(runtime_options.GetOrDefault(Opt::ThreadSuspendTimeout));
intern_table_ = new InternTable;
+ monitor_timeout_enable_ = runtime_options.GetOrDefault(Opt::MonitorTimeoutEnable);
+ int monitor_timeout_ms = runtime_options.GetOrDefault(Opt::MonitorTimeout);
+ if (monitor_timeout_ms < Monitor::kMonitorTimeoutMinMs ||
+ monitor_timeout_ms >= Monitor::kMonitorTimeoutMaxMs) {
+ LOG(ERROR) << "Improper monitor timeout could cause crash!";
+ }
+ monitor_timeout_ns_ = MsToNs(monitor_timeout_ms);
+
verify_ = runtime_options.GetOrDefault(Opt::Verify);
target_sdk_version_ = runtime_options.GetOrDefault(Opt::TargetSdkVersion);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 6f11916699..88f7bc0240 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -981,6 +981,13 @@ class Runtime {
return perfetto_javaheapprof_enabled_;
}
+ bool IsMonitorTimeoutEnabled() const {
+ return monitor_timeout_enable_;
+ }
+
+ uint64_t GetMonitorTimeoutNs() const {
+ return monitor_timeout_ns_;
+ }
// Return true if we should load oat files as executable or not.
bool GetOatFilesExecutable() const;
@@ -1239,6 +1246,9 @@ class Runtime {
// Whether Java code needs to be debuggable.
bool is_java_debuggable_;
+ bool monitor_timeout_enable_;
+ uint64_t monitor_timeout_ns_;
+
// Whether or not this application can be profiled by the shell user,
// even when running on a device that is running in user mode.
bool is_profileable_from_shell_ = false;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index ef5bed7779..3f0e3cca59 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -66,6 +66,8 @@ RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
LongGCLogThreshold, gc::Heap::kDefaultLongGCLogThreshold)
RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
ThreadSuspendTimeout, ThreadList::kDefaultThreadSuspendTimeout)
+RUNTIME_OPTIONS_KEY (bool, MonitorTimeoutEnable, false)
+RUNTIME_OPTIONS_KEY (int, MonitorTimeout, Monitor::kDefaultMonitorTimeoutMs)
RUNTIME_OPTIONS_KEY (Unit, DumpGCPerformanceOnShutdown)
RUNTIME_OPTIONS_KEY (Unit, DumpRegionInfoBeforeGC)
RUNTIME_OPTIONS_KEY (Unit, DumpRegionInfoAfterGC)
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index fb4f6d304d..e0d62d0e03 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -118,15 +118,6 @@ bool ThreadList::Contains(Thread* thread) {
return find(list_.begin(), list_.end(), thread) != list_.end();
}
-bool ThreadList::Contains(pid_t tid) {
- for (const auto& thread : list_) {
- if (thread->GetTid() == tid) {
- return true;
- }
- }
- return false;
-}
-
pid_t ThreadList::GetLockOwner() {
return Locks::thread_list_lock_->GetExclusiveOwnerTid();
}
@@ -179,12 +170,12 @@ void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack)
char* end;
pid_t tid = strtol(e->d_name, &end, 10);
if (!*end) {
- bool contains;
+ Thread* thread;
{
MutexLock mu(self, *Locks::thread_list_lock_);
- contains = Contains(tid);
+ thread = FindThreadByTid(tid);
}
- if (!contains) {
+ if (thread != nullptr) {
DumpUnattachedThread(os, tid, dump_native_stack);
}
}
@@ -1110,6 +1101,15 @@ Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) {
return nullptr;
}
+Thread* ThreadList::FindThreadByTid(int tid) {
+ for (const auto& thread : list_) {
+ if (thread->GetTid() == tid) {
+ return thread;
+ }
+ }
+ return nullptr;
+}
+
void ThreadList::WaitForOtherNonDaemonThreadsToExit(bool check_no_birth) {
ScopedTrace trace(__PRETTY_FUNCTION__);
Thread* self = Thread::Current();
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 1dcdf6241a..87a4c8dc61 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -104,6 +104,9 @@ class ThreadList {
// Find an existing thread (or self) by its thread id (not tid).
Thread* FindThreadByThreadId(uint32_t thread_id) REQUIRES(Locks::thread_list_lock_);
+ // Find an existing thread (or self) by its tid (not thread id).
+ Thread* FindThreadByTid(int tid) REQUIRES(Locks::thread_list_lock_);
+
// Does the thread list still contain the given thread, or one at the same address?
// Used by Monitor to provide (mostly accurate) debugging information.
bool Contains(Thread* thread) REQUIRES(Locks::thread_list_lock_);
@@ -191,7 +194,6 @@ class ThreadList {
uint32_t AllocThreadId(Thread* self);
void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_);
- bool Contains(pid_t tid) REQUIRES(Locks::thread_list_lock_);
size_t RunCheckpoint(Closure* checkpoint_function, bool includeSuspended)
REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);