Fix thread hang
- Primary problem was ScopedThreadListLock was releasing heap lock in constructor instead of destructor
- Secondary problem was ScopedThreadListLock should not be used with Mutex::Wait
- Added Thread.getStackTrace case to ThreadStress that reproduces YouTube problem
- Added Mutex::GetDepth and related methods that were useful in diagnoising this issue
Change-Id: I1bdc7245e9b411378b98f4dcf498ad66eb96366d
diff --git a/src/heap.h b/src/heap.h
index 6b056a2..2e9aa35 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -22,6 +22,7 @@
#include "card_table.h"
#include "globals.h"
#include "heap_bitmap.h"
+#include "mutex.h"
#include "offsets.h"
#define VERIFY_OBJECT_ENABLED 0
@@ -29,7 +30,6 @@
namespace art {
class Class;
-class Mutex;
class Object;
class Space;
class Thread;
@@ -107,6 +107,12 @@
static pid_t GetLockOwner(); // For SignalCatcher.
static void Lock();
static void Unlock();
+ static void AssertLockHeld() {
+ lock_->AssertHeld();
+ }
+ static void AssertLockNotHeld() {
+ lock_->AssertNotHeld();
+ }
static const std::vector<Space*>& GetSpaces() {
return spaces_;
diff --git a/src/mutex.cc b/src/mutex.cc
index fca31b6..a2d9558 100644
--- a/src/mutex.cc
+++ b/src/mutex.cc
@@ -103,6 +103,33 @@
#endif
}
+uint32_t Mutex::GetDepth() {
+ bool held = (GetOwner() == GetTid());
+ if (!held) {
+ return 0;
+ }
+ uint32_t depth;
+#if defined(__BIONIC__)
+ depth = static_cast<uint32_t>((mutex_.value >> 2) & 0x7ff) + 1;
+#elif defined(__GLIBC__)
+ struct __attribute__((__may_alias__)) glibc_pthread_t {
+ int lock;
+ unsigned int count;
+ int owner;
+ // ...other stuff we don't care about.
+ };
+ depth = reinterpret_cast<glibc_pthread_t*>(&mutex_)->count;
+#elif defined(__APPLE__)
+ // We don't know a way to implement this for Mac OS.
+ return 0;
+#else
+ UNIMPLEMENTED(FATAL);
+ return 0;
+#endif
+ CHECK_NE(0U, depth) << "owner=" << GetOwner() << " tid=" << GetTid();
+ return depth;
+}
+
pid_t Mutex::GetTid() {
return ::art::GetTid();
}
diff --git a/src/mutex.h b/src/mutex.h
index 949d221..93f6c3e 100644
--- a/src/mutex.h
+++ b/src/mutex.h
@@ -18,6 +18,7 @@
#define ART_SRC_MUTEX_H_
#include <pthread.h>
+#include <stdint.h>
#include <string>
#include "logging.h"
@@ -58,11 +59,19 @@
pid_t GetOwner();
+ void AssertDepth(uint32_t depth) {
+#if !defined(__APPLE__)
+ DCHECK_EQ(depth, GetDepth());
+#endif
+ }
+
private:
static pid_t GetTid();
void ClearOwner();
+ uint32_t GetDepth();
+
std::string name_;
pthread_mutex_t mutex_;
diff --git a/src/mutex_test.cc b/src/mutex_test.cc
new file mode 100644
index 0000000..bafd789
--- /dev/null
+++ b/src/mutex_test.cc
@@ -0,0 +1,53 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+
+#include "mutex.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+TEST(Mutex, LockUnlock) {
+ Mutex mu("test mutex");
+ mu.AssertDepth(0U);
+ mu.Lock();
+ mu.AssertDepth(1U);
+ mu.Unlock();
+ mu.AssertDepth(0U);
+}
+
+TEST(Mutex, TryLockUnlock) {
+ Mutex mu("test mutex");
+ mu.AssertDepth(0U);
+ mu.TryLock();
+ mu.AssertDepth(1U);
+ mu.Unlock();
+ mu.AssertDepth(0U);
+}
+
+TEST(Mutex, RecursiveLockUnlock) {
+ Mutex mu("test mutex");
+ mu.AssertDepth(0U);
+ mu.Lock();
+ mu.AssertDepth(1U);
+ mu.Lock();
+ mu.AssertDepth(2U);
+ mu.Unlock();
+ mu.AssertDepth(1U);
+ mu.Unlock();
+ mu.AssertDepth(0U);
+}
+
+TEST(Mutex, RecursiveTryLockUnlock) {
+ Mutex mu("test mutex");
+ mu.AssertDepth(0U);
+ mu.TryLock();
+ mu.AssertDepth(1U);
+ mu.TryLock();
+ mu.AssertDepth(2U);
+ mu.Unlock();
+ mu.AssertDepth(1U);
+ mu.Unlock();
+ mu.AssertDepth(0U);
+}
+
+} // namespace art
diff --git a/src/thread_list.cc b/src/thread_list.cc
index 9f7f08e..57a2124 100644
--- a/src/thread_list.cc
+++ b/src/thread_list.cc
@@ -32,7 +32,8 @@
// back into managed code to remove the thread from its ThreadGroup, and that allocates
// an iterator).
// TODO: this makes the distinction between the two locks pretty pointless.
- if (self != NULL) {
+ heap_lock_held_ = (self != NULL);
+ if (heap_lock_held_) {
Heap::Lock();
}
@@ -50,14 +51,13 @@
thread_list->thread_list_lock_.Lock();
}
}
-
- if (self != NULL) {
- Heap::Unlock();
- }
}
ScopedThreadListLock::~ScopedThreadListLock() {
Runtime::Current()->GetThreadList()->thread_list_lock_.Unlock();
+ if (heap_lock_held_) {
+ Heap::Unlock();
+ }
}
ThreadList::ThreadList()
@@ -420,13 +420,16 @@
CHECK(child != self);
{
- ScopedThreadListLock thread_list_lock;
+ // We don't use ScopedThreadListLock here because we don't want to
+ // hold the heap lock while waiting because it can lead to deadlock.
+ thread_list_lock_.Lock();
VLOG(threads) << *self << " waiting for child " << *child << " to be in thread list...";
// We wait for the child to tell us that it's in the thread list.
while (child->GetState() != Thread::kStarting) {
thread_start_cond_.Wait(thread_list_lock_);
}
+ thread_list_lock_.Unlock();
}
// If we switch out of runnable and then back in, we know there's no pending suspend.
@@ -445,7 +448,9 @@
DCHECK(Contains(self));
{
- ScopedThreadListLock thread_list_lock;
+ // We don't use ScopedThreadListLock here because we don't want to
+ // hold the heap lock while waiting because it can lead to deadlock.
+ thread_list_lock_.Lock();
// Tell our parent that we're in the thread list.
VLOG(threads) << *self << " telling parent that we're now in thread list...";
@@ -458,6 +463,7 @@
while (self->GetState() != Thread::kVmWait) {
thread_start_cond_.Wait(thread_list_lock_);
}
+ thread_list_lock_.Unlock();
}
// Enter the runnable state. We know that any pending suspend will affect us now.
diff --git a/src/thread_list.h b/src/thread_list.h
index 5da9877..eb08401 100644
--- a/src/thread_list.h
+++ b/src/thread_list.h
@@ -92,6 +92,7 @@
~ScopedThreadListLock();
private:
+ bool heap_lock_held_;
DISALLOW_COPY_AND_ASSIGN(ScopedThreadListLock);
};