Fix race in thread attaching during GC.
Forgot to mask in suspend request if thread is attaching during GC.
Lots of extra assertions and debugging.
Change-Id: Id4d2ab659284acace51b37b86831a968c1945ae8
diff --git a/src/mutex.cc b/src/mutex.cc
index cf93ef5..a3aec41 100644
--- a/src/mutex.cc
+++ b/src/mutex.cc
@@ -88,6 +88,14 @@
// ...other stuff we don't care about.
};
+static uint64_t SafeGetTid(const Thread* self) {
+ if (self != NULL) {
+ return static_cast<uint64_t>(self->GetTid());
+ } else {
+ return static_cast<uint64_t>(GetTid());
+ }
+}
+
BaseMutex::BaseMutex(const char* name, LockLevel level) : level_(level), name_(name) {}
static void CheckUnattachedThread(LockLevel level) {
@@ -193,6 +201,7 @@
}
void Mutex::ExclusiveLock(Thread* self) {
+ DCHECK(self == NULL || self == Thread::Current());
if (kDebugLocking && !recursive_) {
AssertNotHeld(self);
}
@@ -209,6 +218,7 @@
}
bool Mutex::ExclusiveTryLock(Thread* self) {
+ DCHECK(self == NULL || self == Thread::Current());
if (kDebugLocking && !recursive_) {
AssertNotHeld(self);
}
@@ -233,6 +243,7 @@
}
void Mutex::ExclusiveUnlock(Thread* self) {
+ DCHECK(self == NULL || self == Thread::Current());
AssertHeld(self);
recursion_count_--;
if (!recursive_ || recursion_count_ == 0) {
@@ -246,14 +257,12 @@
}
bool Mutex::IsExclusiveHeld(const Thread* self) const {
- bool result;
- if (self == NULL || level_ == kMonitorLock) { // Handle unattached threads and monitors.
- result = (GetExclusiveOwnerTid() == static_cast<uint64_t>(GetTid()));
- } else {
- result = (self->GetHeldMutex(level_) == this);
- // Sanity debug check that if we think it is locked, so does the pthread.
- if (kDebugLocking) {
- CHECK(result == (GetExclusiveOwnerTid() == static_cast<uint64_t>(GetTid())));
+ DCHECK(self == NULL || self == Thread::Current());
+ bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
+ if (kDebugLocking) {
+ // Sanity debug check that if we think it is locked we have it in our held mutexes.
+ if (result && self != NULL && level_ != kMonitorLock) {
+ CHECK_EQ(self->GetHeldMutex(level_), this);
}
}
return result;
@@ -280,6 +289,19 @@
#endif
}
+std::string Mutex::Dump() const {
+ return StringPrintf("%s %s level=%d count=%d owner=%llx",
+ recursive_ ? "recursive" : "non-recursive",
+ name_.c_str(),
+ static_cast<int>(level_),
+ recursion_count_,
+ GetExclusiveOwnerTid());
+}
+
+std::ostream& operator<<(std::ostream& os, const Mutex& mu) {
+ return os << mu.Dump();
+}
+
ReaderWriterMutex::ReaderWriterMutex(const char* name, LockLevel level) :
BaseMutex(name, level)
#if ART_USE_FUTEXES
@@ -311,6 +333,7 @@
}
void ReaderWriterMutex::ExclusiveLock(Thread* self) {
+ DCHECK(self == NULL || self == Thread::Current());
AssertNotExclusiveHeld(self);
#if ART_USE_FUTEXES
bool done = false;
@@ -330,7 +353,7 @@
android_atomic_dec(&num_pending_writers_);
}
} while(!done);
- exclusive_owner_ = static_cast<uint64_t>(GetTid());
+ exclusive_owner_ = static_cast<uint64_t>(self->GetTid());
#else
CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
#endif
@@ -339,6 +362,7 @@
}
void ReaderWriterMutex::ExclusiveUnlock(Thread* self) {
+ DCHECK(self == NULL || self == Thread::Current());
AssertExclusiveHeld(self);
RegisterAsUnlocked(self);
#if ART_USE_FUTEXES
@@ -367,6 +391,7 @@
#if HAVE_TIMED_RWLOCK
bool ReaderWriterMutex::ExclusiveLockWithTimeout(Thread* self, const timespec& abs_timeout) {
+ DCHECK(self == NULL || self == Thread::Current());
#if ART_USE_FUTEXES
bool done = false;
do {
@@ -388,7 +413,7 @@
android_atomic_dec(&num_pending_writers_);
}
} while(!done);
- exclusive_owner_ = static_cast<uint64_t>(GetTid());
+ exclusive_owner_ = SafeGetTid(self);
#else
int result = pthread_rwlock_timedwrlock(&rwlock_, &abs_timeout);
if (result == ETIMEDOUT) {
@@ -406,6 +431,7 @@
#endif
void ReaderWriterMutex::SharedLock(Thread* self) {
+ DCHECK(self == NULL || self == Thread::Current());
#if ART_USE_FUTEXES
bool done = false;
do {
@@ -432,6 +458,7 @@
}
bool ReaderWriterMutex::SharedTryLock(Thread* self) {
+ DCHECK(self == NULL || self == Thread::Current());
#if ART_USE_FUTEXES
bool done = false;
do {
@@ -460,6 +487,7 @@
}
void ReaderWriterMutex::SharedUnlock(Thread* self) {
+ DCHECK(self == NULL || self == Thread::Current());
AssertSharedHeld(self);
RegisterAsUnlocked(self);
#if ART_USE_FUTEXES
@@ -485,18 +513,22 @@
}
bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
- bool result = (GetExclusiveOwnerTid() == static_cast<uint64_t>(GetTid()));
+ DCHECK(self == NULL || self == Thread::Current());
+ bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
if (kDebugLocking) {
// Sanity that if the pthread thinks we own the lock the Thread agrees.
- CHECK((self == NULL) || !result || (self->GetHeldMutex(level_) == this));
+ if (self != NULL && result) {
+ CHECK_EQ(self->GetHeldMutex(level_), this);
+ }
}
return result;
}
bool ReaderWriterMutex::IsSharedHeld(const Thread* self) const {
+ DCHECK(self == NULL || self == Thread::Current());
bool result;
if (UNLIKELY(self == NULL)) { // Handle unattached threads.
- result = IsExclusiveHeld(self); // TODO: a better best effort here.
+ result = IsExclusiveHeld(self); // TODO: a better best effort here.
} else {
result = (self->GetHeldMutex(level_) == this);
}
@@ -527,6 +559,17 @@
#endif
}
+std::string ReaderWriterMutex::Dump() const {
+ return StringPrintf("%s level=%d owner=%llx",
+ name_.c_str(),
+ static_cast<int>(level_),
+ GetExclusiveOwnerTid());
+}
+
+std::ostream& operator<<(std::ostream& os, const ReaderWriterMutex& mu) {
+ return os << mu.Dump();
+}
+
ConditionVariable::ConditionVariable(const std::string& name) : name_(name) {
CHECK_MUTEX_CALL(pthread_cond_init, (&cond_, NULL));
}
diff --git a/src/mutex.h b/src/mutex.h
index af2b352..1738870 100644
--- a/src/mutex.h
+++ b/src/mutex.h
@@ -76,6 +76,7 @@
// Exclusive | Block* | Free
// * Mutex is not reentrant and so an attempt to ExclusiveLock on the same thread will result in
// an error. Being non-reentrant simplifies Waiting on ConditionVariables.
+std::ostream& operator<<(std::ostream& os, const Mutex& mu);
class LOCKABLE Mutex : public BaseMutex {
public:
explicit Mutex(const char* name, LockLevel level = kDefaultMutexLevel, bool recursive = false);
@@ -101,7 +102,7 @@
// Assert that the Mutex is exclusively held by the current thread.
void AssertExclusiveHeld(const Thread* self) {
if (kDebugLocking) {
- CHECK(IsExclusiveHeld(self));
+ CHECK(IsExclusiveHeld(self)) << *this;
}
}
void AssertHeld(const Thread* self) { AssertExclusiveHeld(self); }
@@ -110,7 +111,7 @@
// Assert that the Mutex is not held by the current thread.
void AssertNotHeldExclusive(const Thread* self) {
if (kDebugLocking) {
- CHECK(!IsExclusiveHeld(self));
+ CHECK(!IsExclusiveHeld(self)) << *this;
}
}
void AssertNotHeld(const Thread* self) { AssertNotHeldExclusive(self); }
@@ -123,6 +124,8 @@
return recursion_count_;
}
+ std::string Dump() const;
+
private:
pthread_mutex_t mutex_;
const bool recursive_; // Can the lock be recursively held?
@@ -148,6 +151,7 @@
// Exclusive | Block | Free | Block | error
// Shared(n) | Block | error | SharedLock(n+1)* | Shared(n-1) or Free
// * for large values of n the SharedLock may block.
+std::ostream& operator<<(std::ostream& os, const ReaderWriterMutex& mu);
class LOCKABLE ReaderWriterMutex : public BaseMutex {
public:
explicit ReaderWriterMutex(const char* name, LockLevel level = kDefaultMutexLevel);
@@ -187,7 +191,7 @@
// Assert the current thread has exclusive access to the ReaderWriterMutex.
void AssertExclusiveHeld(const Thread* self) {
if (kDebugLocking) {
- CHECK(IsExclusiveHeld(self));
+ CHECK(IsExclusiveHeld(self)) << *this;
}
}
void AssertWriterHeld(const Thread* self) { AssertExclusiveHeld(self); }
@@ -206,7 +210,7 @@
// Assert the current thread has shared access to the ReaderWriterMutex.
void AssertSharedHeld(const Thread* self) {
if (kDebugLocking) {
- CHECK(IsSharedHeld(self));
+ CHECK(IsSharedHeld(self)) << *this;
}
}
void AssertReaderHeld(const Thread* self) { AssertSharedHeld(self); }
@@ -215,13 +219,15 @@
// mode.
void AssertNotHeld(const Thread* self) {
if (kDebugLocking) {
- CHECK(!IsSharedHeld(self));
+ CHECK(!IsSharedHeld(self)) << *this;
}
}
// Id associated with exclusive owner.
uint64_t GetExclusiveOwnerTid() const;
+ std::string Dump() const;
+
private:
#if ART_USE_FUTEXES
// -1 implies held exclusive, +ve shared held by state_ many owners.
diff --git a/src/signal_catcher.cc b/src/signal_catcher.cc
index 57cae76..c1ac688 100644
--- a/src/signal_catcher.cc
+++ b/src/signal_catcher.cc
@@ -132,7 +132,7 @@
suspend_count = self->GetSuspendCount();
if (suspend_count != 0) {
CHECK_EQ(suspend_count, 1);
- self->ModifySuspendCount(-1, false);
+ self->ModifySuspendCount(self, -1, false);
}
old_state = self->SetStateUnsafe(kRunnable);
}
@@ -159,7 +159,7 @@
MutexLock mu(self, *Locks::thread_suspend_count_lock_);
self->SetState(old_state);
if (suspend_count != 0) {
- self->ModifySuspendCount(+1, false);
+ self->ModifySuspendCount(self, +1, false);
}
}
thread_list->ResumeAll();
diff --git a/src/thread.cc b/src/thread.cc
index bc5b68e..5d1afe1 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -274,6 +274,7 @@
InitRuntimeEntryPoints(&runtime_entry_points_);
#endif
InitCardTable();
+ InitTid();
Runtime* runtime = Runtime::Current();
CHECK(runtime != NULL);
@@ -283,7 +284,6 @@
thin_lock_id_ = runtime->GetThreadList()->AllocThreadId();
pthread_self_ = pthread_self();
- InitTid();
InitStackHwm();
CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
@@ -454,22 +454,6 @@
name.assign(*name_);
}
-// Attempt to rectify locks so that we dump thread list with required locks before exiting.
-static void UnsafeLogFatalForSuspendCount(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
- Locks::thread_suspend_count_lock_->Unlock(self);
- Locks::mutator_lock_->SharedTryLock(self);
- if (!Locks::mutator_lock_->IsSharedHeld(self)) {
- LOG(WARNING) << "Dumping thread list without holding mutator_lock_";
- }
- Locks::thread_list_lock_->TryLock(self);
- if (!Locks::thread_list_lock_->IsExclusiveHeld(self)) {
- LOG(WARNING) << "Dumping thread list without holding thread_list_lock_";
- }
- std::ostringstream ss;
- Runtime::Current()->GetThreadList()->DumpLocked(ss);
- LOG(FATAL) << self << " suspend count already zero.\n" << ss.str();
-}
-
void Thread::AtomicSetFlag(ThreadFlag flag) {
android_atomic_or(flag, &state_and_flags_.as_int);
}
@@ -488,23 +472,42 @@
return static_cast<ThreadState>(old_state_and_flags.as_struct.state);
}
-void Thread::ModifySuspendCount(int delta, bool for_debugger) {
+// Attempt to rectify locks so that we dump thread list with required locks before exiting.
+static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
+ Locks::thread_suspend_count_lock_->Unlock(self);
+ if (!Locks::mutator_lock_->IsSharedHeld(self)) {
+ Locks::mutator_lock_->SharedTryLock(self);
+ if (!Locks::mutator_lock_->IsSharedHeld(self)) {
+ LOG(WARNING) << "Dumping thread list without holding mutator_lock_";
+ }
+ }
+ if (!Locks::thread_list_lock_->IsExclusiveHeld(self)) {
+ Locks::thread_list_lock_->TryLock(self);
+ if (!Locks::thread_list_lock_->IsExclusiveHeld(self)) {
+ LOG(WARNING) << "Dumping thread list without holding thread_list_lock_";
+ }
+ }
+ std::ostringstream ss;
+ Runtime::Current()->GetThreadList()->DumpLocked(ss);
+ LOG(FATAL) << *thread << " suspend count already zero.\n" << ss.str();
+}
+
+void Thread::ModifySuspendCount(Thread* self, int delta, bool for_debugger) {
DCHECK(delta == -1 || delta == +1 || delta == -debug_suspend_count_)
<< delta << " " << debug_suspend_count_ << " " << this;
DCHECK_GE(suspend_count_, debug_suspend_count_) << this;
- Locks::thread_suspend_count_lock_->AssertHeld();
+ Locks::thread_suspend_count_lock_->AssertHeld(self);
- if (delta == -1 && suspend_count_ <= 0) {
- // This is expected if you attach a thread during a GC.
- if (UNLIKELY(!IsStillStarting())) {
- UnsafeLogFatalForSuspendCount(this);
- }
+ if (UNLIKELY(delta < 0 && suspend_count_ <= 0)) {
+ UnsafeLogFatalForSuspendCount(self, this);
return;
}
+
suspend_count_ += delta;
if (for_debugger) {
debug_suspend_count_ += delta;
}
+
if (suspend_count_ == 0) {
AtomicClearFlag(kSuspendRequest);
} else {
@@ -534,30 +537,35 @@
ThreadState Thread::TransitionFromSuspendedToRunnable() {
bool done = false;
- ThreadState old_state = GetState();
- DCHECK_NE(old_state, kRunnable);
+ union StateAndFlags old_state_and_flags = state_and_flags_;
+ int16_t old_state = old_state_and_flags.as_struct.state;
+ DCHECK_NE(static_cast<ThreadState>(old_state), kRunnable);
do {
Locks::mutator_lock_->AssertNotHeld(this); // Otherwise we starve GC..
- DCHECK_EQ(GetState(), old_state);
- if (ReadFlag(kSuspendRequest)) {
+ old_state_and_flags = state_and_flags_;
+ DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
+ if ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
// Wait while our suspend count is non-zero.
MutexLock mu(this, *Locks::thread_suspend_count_lock_);
- DCHECK_EQ(GetState(), old_state);
- while (ReadFlag(kSuspendRequest)) {
+ old_state_and_flags = state_and_flags_;
+ DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
+ while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
// Re-check when Thread::resume_cond_ is notified.
Thread::resume_cond_->Wait(this, *Locks::thread_suspend_count_lock_);
- DCHECK_EQ(GetState(), old_state);
+ old_state_and_flags = state_and_flags_;
+ DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
}
DCHECK_EQ(GetSuspendCount(), 0);
}
// Re-acquire shared mutator_lock_ access.
Locks::mutator_lock_->SharedLock(this);
// Atomically change from suspended to runnable if no suspend request pending.
- int16_t old_flags = state_and_flags_.as_struct.flags;
- if ((old_flags & kSuspendRequest) == 0) {
- int32_t old_state_and_flags = old_flags | (old_state << 16);
- int32_t new_state_and_flags = old_flags | (kRunnable << 16);
- done = android_atomic_cmpxchg(old_state_and_flags, new_state_and_flags,
+ old_state_and_flags = state_and_flags_;
+ DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
+ if ((old_state_and_flags.as_struct.flags & kSuspendRequest) == 0) {
+ union StateAndFlags new_state_and_flags = old_state_and_flags;
+ new_state_and_flags.as_struct.state = kRunnable;
+ done = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
&state_and_flags_.as_int)
== 0;
}
@@ -566,7 +574,7 @@
Locks::mutator_lock_->SharedUnlock(this);
}
} while (!done);
- return old_state;
+ return static_cast<ThreadState>(old_state);
}
Thread* Thread::SuspendForDebugger(jobject peer, bool request_suspension, bool* timeout) {
@@ -588,7 +596,7 @@
{
MutexLock mu(soa.Self(), *Locks::thread_suspend_count_lock_);
if (request_suspension) {
- thread->ModifySuspendCount(+1, true /* for_debugger */);
+ thread->ModifySuspendCount(soa.Self(), +1, true /* for_debugger */);
request_suspension = false;
did_suspend_request = true;
}
@@ -606,7 +614,7 @@
if (total_delay_us >= kTimeoutUs) {
LOG(ERROR) << "Thread suspension timed out: " << peer;
if (did_suspend_request) {
- thread->ModifySuspendCount(-1, true /* for_debugger */);
+ thread->ModifySuspendCount(soa.Self(), -1, true /* for_debugger */);
}
*timeout = true;
return NULL;
diff --git a/src/thread.h b/src/thread.h
index 257dee4..68172e5 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -169,7 +169,7 @@
return GetState() != kRunnable && ReadFlag(kSuspendRequest);
}
- void ModifySuspendCount(int delta, bool for_debugger)
+ void ModifySuspendCount(Thread* self, int delta, bool for_debugger)
EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
// Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
@@ -636,9 +636,9 @@
// transitions. Changing to Runnable requires that the suspend_request be part of the atomic
// operation. If a thread is suspended and a suspend_request is present, a thread may not
// change to Runnable as a GC or other operation is in progress.
- uint16_t state;
+ volatile uint16_t state;
} as_struct;
- int32_t as_int;
+ volatile int32_t as_int;
};
union StateAndFlags state_and_flags_;
COMPILE_ASSERT(sizeof(union StateAndFlags) == sizeof(int32_t),
diff --git a/src/thread_list.cc b/src/thread_list.cc
index 082d7af..56912ac 100644
--- a/src/thread_list.cc
+++ b/src/thread_list.cc
@@ -117,12 +117,16 @@
}
}
-void ThreadList::AssertThreadsAreSuspended() {
+void ThreadList::AssertThreadsAreSuspended(Thread* ignore1, Thread* ignore2) {
MutexLock mu(*Locks::thread_list_lock_);
MutexLock mu2(*Locks::thread_suspend_count_lock_);
for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
Thread* thread = *it;
- CHECK_NE(thread->GetState(), kRunnable);
+ if (thread != ignore1 || thread == ignore2) {
+ CHECK(thread->IsSuspended())
+ << "\nUnsuspended thread: <<" << *thread << "\n"
+ << "self: <<" << *Thread::Current();
+ }
}
}
@@ -171,7 +175,7 @@
continue;
}
VLOG(threads) << "requesting thread suspend: " << *thread;
- thread->ModifySuspendCount(+1, false);
+ thread->ModifySuspendCount(self, +1, false);
}
}
}
@@ -190,7 +194,7 @@
#endif
// Debug check that all threads are suspended.
- AssertThreadsAreSuspended();
+ AssertThreadsAreSuspended(self);
VLOG(threads) << *self << " SuspendAll complete";
}
@@ -199,6 +203,10 @@
Thread* self = Thread::Current();
VLOG(threads) << *self << " ResumeAll starting";
+
+ // Debug check that all threads are suspended.
+ AssertThreadsAreSuspended(self);
+
Locks::mutator_lock_->ExclusiveUnlock(self);
{
MutexLock mu(self, *Locks::thread_list_lock_);
@@ -211,7 +219,7 @@
if (thread == self) {
continue;
}
- thread->ModifySuspendCount(-1, false);
+ thread->ModifySuspendCount(self, -1, false);
}
// Broadcast a notification to all suspended threads, some or all of
@@ -236,7 +244,7 @@
if (!Contains(thread)) {
return;
}
- thread->ModifySuspendCount(-1, for_debugger);
+ thread->ModifySuspendCount(self, -1, for_debugger);
}
{
@@ -268,7 +276,7 @@
continue;
}
VLOG(threads) << "requesting thread suspend: " << *thread;
- thread->ModifySuspendCount(+1, true);
+ thread->ModifySuspendCount(self, +1, true);
}
}
}
@@ -289,7 +297,7 @@
Locks::mutator_lock_->ExclusiveLock(self);
Locks::mutator_lock_->ExclusiveUnlock(self);
#endif
- AssertThreadsAreSuspended();
+ AssertThreadsAreSuspended(self, debug_thread);
VLOG(threads) << *self << " SuspendAll complete";
}
@@ -306,7 +314,7 @@
// to ensure that we're the only one fiddling with the suspend count
// though.
MutexLock mu(self, *Locks::thread_suspend_count_lock_);
- self->ModifySuspendCount(+1, true);
+ self->ModifySuspendCount(self, +1, true);
// Suspend ourselves.
CHECK_GT(self->suspend_count_, 0);
@@ -351,7 +359,7 @@
if (thread == self || thread->debug_suspend_count_ == 0) {
continue;
}
- thread->ModifySuspendCount(-thread->debug_suspend_count_, true);
+ thread->ModifySuspendCount(self, -thread->debug_suspend_count_, true);
}
}
@@ -397,7 +405,7 @@
// daemons.
CHECK(thread->IsDaemon());
if (thread != self) {
- thread->ModifySuspendCount(+1, false);
+ thread->ModifySuspendCount(self, +1, false);
}
}
}
@@ -438,6 +446,9 @@
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
self->suspend_count_ = suspend_all_count_;
self->debug_suspend_count_ = debug_suspend_all_count_;
+ if (self->suspend_count_ > 0) {
+ self->AtomicSetFlag(kSuspendRequest);
+ }
CHECK(!Contains(self));
list_.push_back(self);
}
diff --git a/src/thread_list.h b/src/thread_list.h
index b80c1a5..f1c8a44 100644
--- a/src/thread_list.h
+++ b/src/thread_list.h
@@ -107,7 +107,7 @@
LOCKS_EXCLUDED(Locks::thread_list_lock_,
Locks::thread_suspend_count_lock_);
- void AssertThreadsAreSuspended()
+ void AssertThreadsAreSuspended(Thread* ignore1, Thread* ignore2 = NULL)
LOCKS_EXCLUDED(Locks::thread_list_lock_,
Locks::thread_suspend_count_lock_);