Read thread-local mark-stack inside critical section

Reading thread-local mark-stack outside mark_stack_lock_'s critical
section (CS) and then adding it to pooled_mark_stacks_ vector in the CS
leads to concurrency issue. A destroying mutator reads its mark-stack
and then waits on mark_stack_lock_ as concurrently running GC-thread
revokes the mutator's mark-stack. Eventually, when mutator resumes, it
adds the same mark-stack again to the pooled_mark_stacks_ vector.

Therefore, reading the thread-local mark-stack should also be performed
inside the mark_stack_lock_'s CS.

Test: art/test/testrunner/testrunner.py
Bug: 140119552
Change-Id: I1daf7131e380699cd6bfcc8bfe2f2db52b661bd6
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index afae3ef..d15fdad 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -995,12 +995,14 @@
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
     // Revoke thread local mark stacks.
-    accounting::AtomicStack<mirror::Object>* tl_mark_stack = thread->GetThreadLocalMarkStack();
-    if (tl_mark_stack != nullptr) {
+    {
       MutexLock mu(self, concurrent_copying_->mark_stack_lock_);
-      concurrent_copying_->revoked_mark_stacks_.push_back(tl_mark_stack);
-      thread->SetThreadLocalMarkStack(nullptr);
-      concurrent_copying_->RemoveThreadMarkStackMapping(thread, tl_mark_stack);
+      accounting::AtomicStack<mirror::Object>* tl_mark_stack = thread->GetThreadLocalMarkStack();
+      if (tl_mark_stack != nullptr) {
+        concurrent_copying_->revoked_mark_stacks_.push_back(tl_mark_stack);
+        thread->SetThreadLocalMarkStack(nullptr);
+        concurrent_copying_->RemoveThreadMarkStackMapping(thread, tl_mark_stack);
+      }
     }
     // Disable weak ref access.
     if (disable_weak_ref_access_) {
@@ -2051,10 +2053,10 @@
 void ConcurrentCopying::RevokeThreadLocalMarkStack(Thread* thread) {
   Thread* self = Thread::Current();
   CHECK_EQ(self, thread);
+  MutexLock mu(self, mark_stack_lock_);
   accounting::AtomicStack<mirror::Object>* tl_mark_stack = thread->GetThreadLocalMarkStack();
   if (tl_mark_stack != nullptr) {
     CHECK(is_marking_);
-    MutexLock mu(self, mark_stack_lock_);
     revoked_mark_stacks_.push_back(tl_mark_stack);
     RemoveThreadMarkStackMapping(thread, tl_mark_stack);
     thread->SetThreadLocalMarkStack(nullptr);
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 0240460..6482ff7 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -360,6 +360,10 @@
   // (see use case in ConcurrentCopying::MarkFromReadBarrier).
   bool rb_mark_bit_stack_full_;
 
+  // Guards access to pooled_mark_stacks_ and revoked_mark_stacks_ vectors.
+  // Also guards destruction and revocations of thread-local mark-stacks.
+  // Clearing thread-local mark-stack (by other threads or during destruction)
+  // should be guarded by it.
   Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<accounting::ObjectStack*> revoked_mark_stacks_
       GUARDED_BY(mark_stack_lock_);