diff options
| -rw-r--r-- | runtime/asm_support.h | 2 | ||||
| -rw-r--r-- | runtime/entrypoints_order_test.cc | 3 | ||||
| -rw-r--r-- | runtime/gc/accounting/atomic_stack.h | 4 | ||||
| -rw-r--r-- | runtime/gc/allocation_record.cc | 3 | ||||
| -rw-r--r-- | runtime/gc/collector/concurrent_copying.cc | 573 | ||||
| -rw-r--r-- | runtime/gc/collector/concurrent_copying.h | 129 | ||||
| -rw-r--r-- | runtime/gc/reference_processor.cc | 42 | ||||
| -rw-r--r-- | runtime/gc/reference_processor.h | 1 | ||||
| -rw-r--r-- | runtime/intern_table.cc | 10 | ||||
| -rw-r--r-- | runtime/intern_table.h | 1 | ||||
| -rw-r--r-- | runtime/java_vm_ext.cc | 13 | ||||
| -rw-r--r-- | runtime/java_vm_ext.h | 1 | ||||
| -rw-r--r-- | runtime/monitor.cc | 10 | ||||
| -rw-r--r-- | runtime/monitor.h | 1 | ||||
| -rw-r--r-- | runtime/oat.h | 2 | ||||
| -rw-r--r-- | runtime/runtime.cc | 7 | ||||
| -rw-r--r-- | runtime/runtime.h | 1 | ||||
| -rw-r--r-- | runtime/thread.cc | 4 | ||||
| -rw-r--r-- | runtime/thread.h | 36 | ||||
| -rw-r--r-- | runtime/thread_list.cc | 7 |
20 files changed, 610 insertions, 240 deletions
diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 20d75f37b3..9142012b73 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -89,7 +89,7 @@ ADD_TEST_EQ(THREAD_ID_OFFSET, art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value()) // Offset of field Thread::tlsPtr_.card_table. -#define THREAD_CARD_TABLE_OFFSET 128 +#define THREAD_CARD_TABLE_OFFSET 136 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET, art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value()) diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 0a5ebfa81b..77b1e862a0 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -133,7 +133,8 @@ class EntrypointsOrderTest : public CommonRuntimeTest { sizeof(void*) * kLockLevelCount); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, nested_signal_state, flip_function, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, flip_function, method_verifier, sizeof(void*)); - EXPECT_OFFSET_DIFF(Thread, tlsPtr_.method_verifier, Thread, wait_mutex_, sizeof(void*), + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, method_verifier, thread_local_mark_stack, sizeof(void*)); + EXPECT_OFFSET_DIFF(Thread, tlsPtr_.thread_local_mark_stack, Thread, wait_mutex_, sizeof(void*), thread_tlsptr_end); } diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h index ac716eaabc..93f32e8d2e 100644 --- a/runtime/gc/accounting/atomic_stack.h +++ b/runtime/gc/accounting/atomic_stack.h @@ -156,6 +156,10 @@ class AtomicStack { return Size() == 0; } + bool IsFull() const { + return Size() == growth_limit_; + } + size_t Size() const { DCHECK_LE(front_index_.LoadRelaxed(), back_index_.LoadRelaxed()); return back_index_.LoadRelaxed() - front_index_.LoadRelaxed(); diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc index 6537ed2bb8..88c475ba06 100644 --- a/runtime/gc/allocation_record.cc +++ b/runtime/gc/allocation_record.cc @@ -265,7 +265,8 @@ void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, m } // Wait for GC's sweeping to complete and allow new records - while (UNLIKELY(!records->allow_new_record_)) { + while (UNLIKELY((!kUseReadBarrier && !records->allow_new_record_) || + (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { records->new_record_condition_.WaitHoldingLocks(self); } diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 5e69b797e0..ac4fc69cbc 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -17,6 +17,7 @@ #include "concurrent_copying.h" #include "art_field-inl.h" +#include "base/stl_util.h" #include "gc/accounting/heap_bitmap-inl.h" #include "gc/accounting/space_bitmap-inl.h" #include "gc/space/image_space.h" @@ -37,17 +38,22 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "concurrent copying + mark sweep"), - region_space_(nullptr), gc_barrier_(new Barrier(0)), mark_queue_(2 * MB), + region_space_(nullptr), gc_barrier_(new Barrier(0)), + gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack", + 2 * MB, 2 * MB)), + mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock), + thread_running_gc_(nullptr), is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false), - heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), + heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff), + weak_ref_access_enabled_(true), skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock), rb_table_(heap_->GetReadBarrierTable()), force_evacuate_all_(false) { static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize, "The region space size and the read barrier table region size must match"); cc_heap_bitmap_.reset(new accounting::HeapBitmap(heap)); + Thread* self = Thread::Current(); { - Thread* self = Thread::Current(); ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); // Cache this so that we won't have to lock heap_bitmap_lock_ in // Mark() which could cause a nested lock on heap_bitmap_lock_ @@ -55,9 +61,19 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) // (class_linker_lock_ and heap_bitmap_lock_). heap_mark_bitmap_ = heap->GetMarkBitmap(); } + { + MutexLock mu(self, mark_stack_lock_); + for (size_t i = 0; i < kMarkStackPoolSize; ++i) { + accounting::AtomicStack<mirror::Object>* mark_stack = + accounting::AtomicStack<mirror::Object>::Create( + "thread local mark stack", kMarkStackSize, kMarkStackSize); + pooled_mark_stacks_.push_back(mark_stack); + } + } } ConcurrentCopying::~ConcurrentCopying() { + STLDeleteElements(&pooled_mark_stacks_); } void ConcurrentCopying::RunPhases() { @@ -65,6 +81,7 @@ void ConcurrentCopying::RunPhases() { CHECK(!is_active_); is_active_ = true; Thread* self = Thread::Current(); + thread_running_gc_ = self; Locks::mutator_lock_->AssertNotHeld(self); { ReaderMutexLock mu(self, *Locks::mutator_lock_); @@ -79,7 +96,7 @@ void ConcurrentCopying::RunPhases() { if (kEnableNoFromSpaceRefsVerification || kIsDebugBuild) { TimingLogger::ScopedTiming split("(Paused)VerifyNoFromSpaceReferences", GetTimings()); ScopedPause pause(this); - CheckEmptyMarkQueue(); + CheckEmptyMarkStack(); if (kVerboseMode) { LOG(INFO) << "Verifying no from-space refs"; } @@ -87,7 +104,7 @@ void ConcurrentCopying::RunPhases() { if (kVerboseMode) { LOG(INFO) << "Done verifying no from-space refs"; } - CheckEmptyMarkQueue(); + CheckEmptyMarkStack(); } { ReaderMutexLock mu(self, *Locks::mutator_lock_); @@ -96,6 +113,7 @@ void ConcurrentCopying::RunPhases() { FinishPhase(); CHECK(is_active_); is_active_ = false; + thread_running_gc_ = nullptr; } void ConcurrentCopying::BindBitmaps() { @@ -132,7 +150,7 @@ void ConcurrentCopying::InitializePhase() { LOG(INFO) << "Region-space : " << reinterpret_cast<void*>(region_space_->Begin()) << "-" << reinterpret_cast<void*>(region_space_->Limit()); } - CHECK(mark_queue_.IsEmpty()); + CheckEmptyMarkStack(); immune_region_.Reset(); bytes_moved_.StoreRelaxed(0); objects_moved_.StoreRelaxed(0); @@ -209,6 +227,7 @@ class FlipCallback : public Closure { cc->from_space_num_bytes_at_first_pause_ = cc->region_space_->GetBytesAllocated(); } cc->is_marking_ = true; + cc->mark_stack_mode_.StoreRelaxed(ConcurrentCopying::kMarkStackModeThreadLocal); if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) { CHECK(Runtime::Current()->IsAotCompiler()); TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings()); @@ -283,7 +302,7 @@ class ConcurrentCopyingImmuneSpaceObjVisitor { } else { // Newly marked. Set the gray bit and push it onto the mark stack. CHECK(!kUseBakerReadBarrier || obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()); - collector_->PushOntoMarkStack<true>(obj); + collector_->PushOntoMarkStack(obj); } } @@ -319,6 +338,7 @@ void ConcurrentCopying::MarkingPhase() { if (kVerboseMode) { LOG(INFO) << "GC MarkingPhase"; } + CHECK(weak_ref_access_enabled_); { // Mark the image root. The WB-based collectors do not need to // scan the image objects from roots by relying on the card table, @@ -370,37 +390,47 @@ void ConcurrentCopying::MarkingPhase() { Thread* self = Thread::Current(); { TimingLogger::ScopedTiming split6("ProcessMarkStack", GetTimings()); - // Process the mark stack and issue an empty check point. If the - // mark stack is still empty after the check point, we're - // done. Otherwise, repeat. + // We transition through three mark stack modes (thread-local, shared, GC-exclusive). The + // primary reasons are the fact that we need to use a checkpoint to process thread-local mark + // stacks, but after we disable weak refs accesses, we can't use a checkpoint due to a deadlock + // issue because running threads potentially blocking at WaitHoldingLocks, and that once we + // reach the point where we process weak references, we can avoid using a lock when accessing + // the GC mark stack, which makes mark stack processing more efficient. + + // Process the mark stack once in the thread local stack mode. This marks most of the live + // objects, aside from weak ref accesses with read barriers (Reference::GetReferent() and system + // weaks) that may happen concurrently while we processing the mark stack and newly mark/gray + // objects and push refs on the mark stack. ProcessMarkStack(); - size_t count = 0; - while (!ProcessMarkStack()) { - ++count; - if (kVerboseMode) { - LOG(INFO) << "Issue an empty check point. " << count; - } - IssueEmptyCheckpoint(); - } - // Need to ensure the mark stack is empty before reference - // processing to get rid of non-reference gray objects. - CheckEmptyMarkQueue(); - // Enable the GetReference slow path and disallow access to the system weaks. - GetHeap()->GetReferenceProcessor()->EnableSlowPath(); - Runtime::Current()->DisallowNewSystemWeaks(); - QuasiAtomic::ThreadFenceForConstructor(); - // Lock-unlock the system weak locks so that there's no thread in - // the middle of accessing system weaks. - Runtime::Current()->EnsureNewSystemWeaksDisallowed(); - // Note: Do not issue a checkpoint from here to the - // SweepSystemWeaks call or else a deadlock due to - // WaitHoldingLocks() would occur. + // Switch to the shared mark stack mode. That is, revoke and process thread-local mark stacks + // for the last time before transitioning to the shared mark stack mode, which would process new + // refs that may have been concurrently pushed onto the mark stack during the ProcessMarkStack() + // call above. At the same time, disable weak ref accesses using a per-thread flag. It's + // important to do these together in a single checkpoint so that we can ensure that mutators + // won't newly gray objects and push new refs onto the mark stack due to weak ref accesses and + // mutators safely transition to the shared mark stack mode (without leaving unprocessed refs on + // the thread-local mark stacks), without a race. This is why we use a thread-local weak ref + // access flag Thread::tls32_.weak_ref_access_enabled_ instead of the global ones. + SwitchToSharedMarkStackMode(); + CHECK(!self->GetWeakRefAccessEnabled()); + // Now that weak refs accesses are disabled, once we exhaust the shared mark stack again here + // (which may be non-empty if there were refs found on thread-local mark stacks during the above + // SwitchToSharedMarkStackMode() call), we won't have new refs to process, that is, mutators + // (via read barriers) have no way to produce any more refs to process. Marking converges once + // before we process weak refs below. + ProcessMarkStack(); + CheckEmptyMarkStack(); + // Switch to the GC exclusive mark stack mode so that we can process the mark stack without a + // lock from this point on. + SwitchToGcExclusiveMarkStackMode(); + CheckEmptyMarkStack(); if (kVerboseMode) { - LOG(INFO) << "Enabled the ref proc slow path & disabled access to system weaks."; LOG(INFO) << "ProcessReferences"; } - ProcessReferences(self, true); - CheckEmptyMarkQueue(); + // Process weak references. This may produce new refs to process and have them processed via + // ProcessMarkStackCallback (in the GC exclusive mark stack mode). + ProcessReferences(self); + CheckEmptyMarkStack(); if (kVerboseMode) { LOG(INFO) << "SweepSystemWeaks"; } @@ -408,33 +438,52 @@ void ConcurrentCopying::MarkingPhase() { if (kVerboseMode) { LOG(INFO) << "SweepSystemWeaks done"; } - // Because hash_set::Erase() can call the hash function for - // arbitrary elements in the weak intern table in - // InternTable::Table::SweepWeaks(), the above SweepSystemWeaks() - // call may have marked some objects (strings) alive. So process - // the mark stack here once again. + // Process the mark stack here one last time because the above SweepSystemWeaks() call may have + // marked some objects (strings alive) as hash_set::Erase() can call the hash function for + // arbitrary elements in the weak intern table in InternTable::Table::SweepWeaks(). ProcessMarkStack(); - CheckEmptyMarkQueue(); - if (kVerboseMode) { - LOG(INFO) << "AllowNewSystemWeaks"; - } - Runtime::Current()->AllowNewSystemWeaks(); + CheckEmptyMarkStack(); + // Re-enable weak ref accesses. + ReenableWeakRefAccess(self); + // Issue an empty checkpoint to ensure no threads are still in the middle of a read barrier + // which may have a from-space ref cached in a local variable. IssueEmptyCheckpoint(); - // Disable marking. + // Marking is done. Disable marking. if (kUseTableLookupReadBarrier) { heap_->rb_table_->ClearAll(); DCHECK(heap_->rb_table_->IsAllCleared()); } - is_mark_queue_push_disallowed_.StoreSequentiallyConsistent(1); - is_marking_ = false; - CheckEmptyMarkQueue(); + is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(1); + is_marking_ = false; // This disables the read barrier/marking of weak roots. + mark_stack_mode_.StoreSequentiallyConsistent(kMarkStackModeOff); + CheckEmptyMarkStack(); } + CHECK(weak_ref_access_enabled_); if (kVerboseMode) { LOG(INFO) << "GC end of MarkingPhase"; } } +void ConcurrentCopying::ReenableWeakRefAccess(Thread* self) { + if (kVerboseMode) { + LOG(INFO) << "ReenableWeakRefAccess"; + } + weak_ref_access_enabled_.StoreRelaxed(true); // This is for new threads. + QuasiAtomic::ThreadFenceForConstructor(); + // Iterate all threads (don't need to or can't use a checkpoint) and re-enable weak ref access. + { + MutexLock mu(self, *Locks::thread_list_lock_); + std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList(); + for (Thread* thread : thread_list) { + thread->SetWeakRefAccessEnabled(true); + } + } + // Unblock blocking threads. + GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self); + Runtime::Current()->BroadcastForNewSystemWeaks(); +} + void ConcurrentCopying::IssueEmptyCheckpoint() { Thread* self = Thread::Current(); EmptyCheckpoint check_point(this); @@ -455,18 +504,61 @@ void ConcurrentCopying::IssueEmptyCheckpoint() { Locks::mutator_lock_->SharedLock(self); } -mirror::Object* ConcurrentCopying::PopOffMarkStack() { - return mark_queue_.Dequeue(); -} - -template<bool kThreadSafe> void ConcurrentCopying::PushOntoMarkStack(mirror::Object* to_ref) { - CHECK_EQ(is_mark_queue_push_disallowed_.LoadRelaxed(), 0) + CHECK_EQ(is_mark_stack_push_disallowed_.LoadRelaxed(), 0) << " " << to_ref << " " << PrettyTypeOf(to_ref); - if (kThreadSafe) { - CHECK(mark_queue_.Enqueue(to_ref)) << "Mark queue overflow"; + Thread* self = Thread::Current(); // TODO: pass self as an argument from call sites? + CHECK(thread_running_gc_ != nullptr); + MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed(); + if (mark_stack_mode == kMarkStackModeThreadLocal) { + if (self == thread_running_gc_) { + // If GC-running thread, use the GC mark stack instead of a thread-local mark stack. + CHECK(self->GetThreadLocalMarkStack() == nullptr); + CHECK(!gc_mark_stack_->IsFull()); + gc_mark_stack_->PushBack(to_ref); + } else { + // Otherwise, use a thread-local mark stack. + accounting::AtomicStack<mirror::Object>* tl_mark_stack = self->GetThreadLocalMarkStack(); + if (UNLIKELY(tl_mark_stack == nullptr || tl_mark_stack->IsFull())) { + MutexLock mu(self, mark_stack_lock_); + // Get a new thread local mark stack. + accounting::AtomicStack<mirror::Object>* new_tl_mark_stack; + if (!pooled_mark_stacks_.empty()) { + // Use a pooled mark stack. + new_tl_mark_stack = pooled_mark_stacks_.back(); + pooled_mark_stacks_.pop_back(); + } else { + // None pooled. Create a new one. + new_tl_mark_stack = + accounting::AtomicStack<mirror::Object>::Create( + "thread local mark stack", 4 * KB, 4 * KB); + } + DCHECK(new_tl_mark_stack != nullptr); + DCHECK(new_tl_mark_stack->IsEmpty()); + new_tl_mark_stack->PushBack(to_ref); + self->SetThreadLocalMarkStack(new_tl_mark_stack); + if (tl_mark_stack != nullptr) { + // Store the old full stack into a vector. + revoked_mark_stacks_.push_back(tl_mark_stack); + } + } else { + tl_mark_stack->PushBack(to_ref); + } + } + } else if (mark_stack_mode == kMarkStackModeShared) { + // Access the shared GC mark stack with a lock. + MutexLock mu(self, mark_stack_lock_); + CHECK(!gc_mark_stack_->IsFull()); + gc_mark_stack_->PushBack(to_ref); } else { - CHECK(mark_queue_.EnqueueThreadUnsafe(to_ref)) << "Mark queue overflow"; + CHECK_EQ(static_cast<uint32_t>(mark_stack_mode), + static_cast<uint32_t>(kMarkStackModeGcExclusive)); + CHECK(self == thread_running_gc_) + << "Only GC-running thread should access the mark stack " + << "in the GC exclusive mark stack mode"; + // Access the GC mark stack without a lock. + CHECK(!gc_mark_stack_->IsFull()); + gc_mark_stack_->PushBack(to_ref); } } @@ -695,83 +787,300 @@ class ConcurrentCopyingAssertToSpaceInvariantObjectVisitor { ConcurrentCopying* collector_; }; -bool ConcurrentCopying::ProcessMarkStack() { +class RevokeThreadLocalMarkStackCheckpoint : public Closure { + public: + explicit RevokeThreadLocalMarkStackCheckpoint(ConcurrentCopying* concurrent_copying, + bool disable_weak_ref_access) + : concurrent_copying_(concurrent_copying), + disable_weak_ref_access_(disable_weak_ref_access) { + } + + virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS { + // Note: self is not necessarily equal to thread since thread may be suspended. + Thread* self = Thread::Current(); + CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc) + << thread->GetState() << " thread " << thread << " self " << self; + // Revoke thread local mark stacks. + accounting::AtomicStack<mirror::Object>* tl_mark_stack = thread->GetThreadLocalMarkStack(); + if (tl_mark_stack != nullptr) { + MutexLock mu(self, concurrent_copying_->mark_stack_lock_); + concurrent_copying_->revoked_mark_stacks_.push_back(tl_mark_stack); + thread->SetThreadLocalMarkStack(nullptr); + } + // Disable weak ref access. + if (disable_weak_ref_access_) { + thread->SetWeakRefAccessEnabled(false); + } + // If thread is a running mutator, then act on behalf of the garbage collector. + // See the code in ThreadList::RunCheckpoint. + if (thread->GetState() == kRunnable) { + concurrent_copying_->GetBarrier().Pass(self); + } + } + + private: + ConcurrentCopying* const concurrent_copying_; + const bool disable_weak_ref_access_; +}; + +void ConcurrentCopying::RevokeThreadLocalMarkStacks(bool disable_weak_ref_access) { + Thread* self = Thread::Current(); + RevokeThreadLocalMarkStackCheckpoint check_point(this, disable_weak_ref_access); + ThreadList* thread_list = Runtime::Current()->GetThreadList(); + gc_barrier_->Init(self, 0); + size_t barrier_count = thread_list->RunCheckpoint(&check_point); + // If there are no threads to wait which implys that all the checkpoint functions are finished, + // then no need to release the mutator lock. + if (barrier_count == 0) { + return; + } + Locks::mutator_lock_->SharedUnlock(self); + { + ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); + gc_barrier_->Increment(self, barrier_count); + } + Locks::mutator_lock_->SharedLock(self); +} + +void ConcurrentCopying::RevokeThreadLocalMarkStack(Thread* thread) { + Thread* self = Thread::Current(); + CHECK_EQ(self, thread); + accounting::AtomicStack<mirror::Object>* tl_mark_stack = thread->GetThreadLocalMarkStack(); + if (tl_mark_stack != nullptr) { + CHECK(is_marking_); + MutexLock mu(self, mark_stack_lock_); + revoked_mark_stacks_.push_back(tl_mark_stack); + thread->SetThreadLocalMarkStack(nullptr); + } +} + +void ConcurrentCopying::ProcessMarkStack() { if (kVerboseMode) { LOG(INFO) << "ProcessMarkStack. "; } + bool empty_prev = false; + while (true) { + bool empty = ProcessMarkStackOnce(); + if (empty_prev && empty) { + // Saw empty mark stack for a second time, done. + break; + } + empty_prev = empty; + } +} + +bool ConcurrentCopying::ProcessMarkStackOnce() { + Thread* self = Thread::Current(); + CHECK(thread_running_gc_ != nullptr); + CHECK(self == thread_running_gc_); + CHECK(self->GetThreadLocalMarkStack() == nullptr); size_t count = 0; - mirror::Object* to_ref; - while ((to_ref = PopOffMarkStack()) != nullptr) { - ++count; - DCHECK(!region_space_->IsInFromSpace(to_ref)); - if (kUseBakerReadBarrier) { - DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) - << " " << to_ref << " " << to_ref->GetReadBarrierPointer() - << " is_marked=" << IsMarked(to_ref); + MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed(); + if (mark_stack_mode == kMarkStackModeThreadLocal) { + // Process the thread-local mark stacks and the GC mark stack. + count += ProcessThreadLocalMarkStacks(false); + while (!gc_mark_stack_->IsEmpty()) { + mirror::Object* to_ref = gc_mark_stack_->PopBack(); + ProcessMarkStackRef(to_ref); + ++count; } - // Scan ref fields. - Scan(to_ref); - // Mark the gray ref as white or black. - if (kUseBakerReadBarrier) { - DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) - << " " << to_ref << " " << to_ref->GetReadBarrierPointer() - << " is_marked=" << IsMarked(to_ref); - } - if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() && - to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr && - !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) { - // Leave References gray so that GetReferent() will trigger RB. - CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref; - } else { -#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER - if (kUseBakerReadBarrier) { - if (region_space_->IsInToSpace(to_ref)) { - // If to-space, change from gray to white. - bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), - ReadBarrier::WhitePtr()); - CHECK(success) << "Must succeed as we won the race."; - CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()); - } else { - // If non-moving space/unevac from space, change from gray - // to black. We can't change gray to white because it's not - // safe to use CAS if two threads change values in opposite - // directions (A->B and B->A). So, we change it to black to - // indicate non-moving objects that have been marked - // through. Note we'd need to change from black to white - // later (concurrently). - bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), - ReadBarrier::BlackPtr()); - CHECK(success) << "Must succeed as we won the race."; - CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr()); + gc_mark_stack_->Reset(); + } else if (mark_stack_mode == kMarkStackModeShared) { + // Process the shared GC mark stack with a lock. + { + MutexLock mu(self, mark_stack_lock_); + CHECK(revoked_mark_stacks_.empty()); + } + while (true) { + std::vector<mirror::Object*> refs; + { + // Copy refs with lock. Note the number of refs should be small. + MutexLock mu(self, mark_stack_lock_); + if (gc_mark_stack_->IsEmpty()) { + break; } + for (StackReference<mirror::Object>* p = gc_mark_stack_->Begin(); + p != gc_mark_stack_->End(); ++p) { + refs.push_back(p->AsMirrorPtr()); + } + gc_mark_stack_->Reset(); + } + for (mirror::Object* ref : refs) { + ProcessMarkStackRef(ref); + ++count; } -#else - DCHECK(!kUseBakerReadBarrier); -#endif } - if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) { - ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this); - visitor(to_ref); + } else { + CHECK_EQ(static_cast<uint32_t>(mark_stack_mode), + static_cast<uint32_t>(kMarkStackModeGcExclusive)); + { + MutexLock mu(self, mark_stack_lock_); + CHECK(revoked_mark_stacks_.empty()); + } + // Process the GC mark stack in the exclusive mode. No need to take the lock. + while (!gc_mark_stack_->IsEmpty()) { + mirror::Object* to_ref = gc_mark_stack_->PopBack(); + ProcessMarkStackRef(to_ref); + ++count; } + gc_mark_stack_->Reset(); } + // Return true if the stack was empty. return count == 0; } -void ConcurrentCopying::CheckEmptyMarkQueue() { - if (!mark_queue_.IsEmpty()) { - while (!mark_queue_.IsEmpty()) { - mirror::Object* obj = mark_queue_.Dequeue(); - if (kUseBakerReadBarrier) { - mirror::Object* rb_ptr = obj->GetReadBarrierPointer(); - LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj) << " rb_ptr=" << rb_ptr - << " is_marked=" << IsMarked(obj); +size_t ConcurrentCopying::ProcessThreadLocalMarkStacks(bool disable_weak_ref_access) { + // Run a checkpoint to collect all thread local mark stacks and iterate over them all. + RevokeThreadLocalMarkStacks(disable_weak_ref_access); + size_t count = 0; + std::vector<accounting::AtomicStack<mirror::Object>*> mark_stacks; + { + MutexLock mu(Thread::Current(), mark_stack_lock_); + // Make a copy of the mark stack vector. + mark_stacks = revoked_mark_stacks_; + revoked_mark_stacks_.clear(); + } + for (accounting::AtomicStack<mirror::Object>* mark_stack : mark_stacks) { + for (StackReference<mirror::Object>* p = mark_stack->Begin(); p != mark_stack->End(); ++p) { + mirror::Object* to_ref = p->AsMirrorPtr(); + ProcessMarkStackRef(to_ref); + ++count; + } + { + MutexLock mu(Thread::Current(), mark_stack_lock_); + if (pooled_mark_stacks_.size() >= kMarkStackPoolSize) { + // The pool has enough. Delete it. + delete mark_stack; + } else { + // Otherwise, put it into the pool for later reuse. + mark_stack->Reset(); + pooled_mark_stacks_.push_back(mark_stack); + } + } + } + return count; +} + +void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { + DCHECK(!region_space_->IsInFromSpace(to_ref)); + if (kUseBakerReadBarrier) { + DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) + << " " << to_ref << " " << to_ref->GetReadBarrierPointer() + << " is_marked=" << IsMarked(to_ref); + } + // Scan ref fields. + Scan(to_ref); + // Mark the gray ref as white or black. + if (kUseBakerReadBarrier) { + DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) + << " " << to_ref << " " << to_ref->GetReadBarrierPointer() + << " is_marked=" << IsMarked(to_ref); + } + if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() && + to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr && + !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) { + // Leave References gray so that GetReferent() will trigger RB. + CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref; + } else { +#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER + if (kUseBakerReadBarrier) { + if (region_space_->IsInToSpace(to_ref)) { + // If to-space, change from gray to white. + bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), + ReadBarrier::WhitePtr()); + CHECK(success) << "Must succeed as we won the race."; + CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()); } else { - LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj) - << " is_marked=" << IsMarked(obj); + // If non-moving space/unevac from space, change from gray + // to black. We can't change gray to white because it's not + // safe to use CAS if two threads change values in opposite + // directions (A->B and B->A). So, we change it to black to + // indicate non-moving objects that have been marked + // through. Note we'd need to change from black to white + // later (concurrently). + bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), + ReadBarrier::BlackPtr()); + CHECK(success) << "Must succeed as we won the race."; + CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr()); } } - LOG(FATAL) << "mark queue is not empty"; +#else + DCHECK(!kUseBakerReadBarrier); +#endif + } + if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) { + ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this); + visitor(to_ref); + } +} + +void ConcurrentCopying::SwitchToSharedMarkStackMode() { + Thread* self = Thread::Current(); + CHECK(thread_running_gc_ != nullptr); + CHECK_EQ(self, thread_running_gc_); + CHECK(self->GetThreadLocalMarkStack() == nullptr); + MarkStackMode before_mark_stack_mode = mark_stack_mode_.LoadRelaxed(); + CHECK_EQ(static_cast<uint32_t>(before_mark_stack_mode), + static_cast<uint32_t>(kMarkStackModeThreadLocal)); + mark_stack_mode_.StoreRelaxed(kMarkStackModeShared); + CHECK(weak_ref_access_enabled_.LoadRelaxed()); + weak_ref_access_enabled_.StoreRelaxed(false); + QuasiAtomic::ThreadFenceForConstructor(); + // Process the thread local mark stacks one last time after switching to the shared mark stack + // mode and disable weak ref accesses. + ProcessThreadLocalMarkStacks(true); + if (kVerboseMode) { + LOG(INFO) << "Switched to shared mark stack mode and disabled weak ref access"; + } +} + +void ConcurrentCopying::SwitchToGcExclusiveMarkStackMode() { + Thread* self = Thread::Current(); + CHECK(thread_running_gc_ != nullptr); + CHECK_EQ(self, thread_running_gc_); + CHECK(self->GetThreadLocalMarkStack() == nullptr); + MarkStackMode before_mark_stack_mode = mark_stack_mode_.LoadRelaxed(); + CHECK_EQ(static_cast<uint32_t>(before_mark_stack_mode), + static_cast<uint32_t>(kMarkStackModeShared)); + mark_stack_mode_.StoreRelaxed(kMarkStackModeGcExclusive); + QuasiAtomic::ThreadFenceForConstructor(); + if (kVerboseMode) { + LOG(INFO) << "Switched to GC exclusive mark stack mode"; + } +} + +void ConcurrentCopying::CheckEmptyMarkStack() { + Thread* self = Thread::Current(); + CHECK(thread_running_gc_ != nullptr); + CHECK_EQ(self, thread_running_gc_); + CHECK(self->GetThreadLocalMarkStack() == nullptr); + MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed(); + if (mark_stack_mode == kMarkStackModeThreadLocal) { + // Thread-local mark stack mode. + RevokeThreadLocalMarkStacks(false); + MutexLock mu(Thread::Current(), mark_stack_lock_); + if (!revoked_mark_stacks_.empty()) { + for (accounting::AtomicStack<mirror::Object>* mark_stack : revoked_mark_stacks_) { + while (!mark_stack->IsEmpty()) { + mirror::Object* obj = mark_stack->PopBack(); + if (kUseBakerReadBarrier) { + mirror::Object* rb_ptr = obj->GetReadBarrierPointer(); + LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj) << " rb_ptr=" << rb_ptr + << " is_marked=" << IsMarked(obj); + } else { + LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj) + << " is_marked=" << IsMarked(obj); + } + } + } + LOG(FATAL) << "mark stack is not empty"; + } + } else { + // Shared, GC-exclusive, or off. + MutexLock mu(Thread::Current(), mark_stack_lock_); + CHECK(gc_mark_stack_->IsEmpty()); + CHECK(revoked_mark_stacks_.empty()); } } @@ -791,7 +1100,7 @@ void ConcurrentCopying::Sweep(bool swap_bitmaps) { heap_->MarkAllocStackAsLive(live_stack); live_stack->Reset(); } - CHECK(mark_queue_.IsEmpty()); + CheckEmptyMarkStack(); TimingLogger::ScopedTiming split("Sweep", GetTimings()); for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->IsContinuousMemMapAllocSpace()) { @@ -887,8 +1196,8 @@ void ConcurrentCopying::ReclaimPhase() { } IssueEmptyCheckpoint(); // Disable the check. - is_mark_queue_push_disallowed_.StoreSequentiallyConsistent(0); - CheckEmptyMarkQueue(); + is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(0); + CheckEmptyMarkStack(); } { @@ -955,6 +1264,8 @@ void ConcurrentCopying::ReclaimPhase() { region_space_bitmap_ = nullptr; } + CheckEmptyMarkStack(); + if (kVerboseMode) { LOG(INFO) << "GC end of ReclaimPhase"; } @@ -1478,7 +1789,7 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { } DCHECK(GetFwdPtr(from_ref) == to_ref); CHECK_NE(to_ref->GetLockWord(false).GetState(), LockWord::kForwardingAddress); - PushOntoMarkStack<true>(to_ref); + PushOntoMarkStack(to_ref); return to_ref; } else { // The CAS failed. It may have lost the race or may have failed @@ -1611,7 +1922,7 @@ mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { if (kUseBakerReadBarrier) { DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()); } - PushOntoMarkStack<true>(to_ref); + PushOntoMarkStack(to_ref); } } else { // from_ref is in a non-moving space. @@ -1638,7 +1949,7 @@ mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { if (kUseBakerReadBarrier) { DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()); } - PushOntoMarkStack<true>(to_ref); + PushOntoMarkStack(to_ref); } } else { // Use the mark bitmap. @@ -1694,7 +2005,7 @@ mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { if (kUseBakerReadBarrier) { DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()); } - PushOntoMarkStack<true>(to_ref); + PushOntoMarkStack(to_ref); } } } @@ -1704,9 +2015,11 @@ mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { } void ConcurrentCopying::FinishPhase() { + { + MutexLock mu(Thread::Current(), mark_stack_lock_); + CHECK_EQ(pooled_mark_stacks_.size(), kMarkStackPoolSize); + } region_space_ = nullptr; - CHECK(mark_queue_.IsEmpty()); - mark_queue_.Clear(); { MutexLock mu(Thread::Current(), skipped_blocks_lock_); skipped_blocks_map_.clear(); @@ -1739,7 +2052,8 @@ mirror::Object* ConcurrentCopying::MarkCallback(mirror::Object* from_ref, void* } void ConcurrentCopying::ProcessMarkStackCallback(void* arg) { - reinterpret_cast<ConcurrentCopying*>(arg)->ProcessMarkStack(); + ConcurrentCopying* concurrent_copying = reinterpret_cast<ConcurrentCopying*>(arg); + concurrent_copying->ProcessMarkStack(); } void ConcurrentCopying::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) { @@ -1747,11 +2061,12 @@ void ConcurrentCopying::DelayReferenceReferent(mirror::Class* klass, mirror::Ref klass, reference, &IsHeapReferenceMarkedCallback, this); } -void ConcurrentCopying::ProcessReferences(Thread* self, bool concurrent) { +void ConcurrentCopying::ProcessReferences(Thread* self) { TimingLogger::ScopedTiming split("ProcessReferences", GetTimings()); + // We don't really need to lock the heap bitmap lock as we use CAS to mark in bitmaps. WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); GetHeap()->GetReferenceProcessor()->ProcessReferences( - concurrent, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), + true /*concurrent*/, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), &IsHeapReferenceMarkedCallback, &MarkCallback, &ProcessMarkStackCallback, this); } diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index b1897b82f4..1fb4703971 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -49,89 +49,6 @@ namespace space { namespace collector { -// Concurrent queue. Used as the mark stack. TODO: use a concurrent -// stack for locality. -class MarkQueue { - public: - explicit MarkQueue(size_t size) : size_(size) { - CHECK(IsPowerOfTwo(size_)); - buf_.reset(new Atomic<mirror::Object*>[size_]); - CHECK(buf_.get() != nullptr); - Clear(); - } - - ALWAYS_INLINE Atomic<mirror::Object*>* GetSlotAddr(size_t index) { - return &(buf_.get()[index & (size_ - 1)]); - } - - // Multiple-proceducer enqueue. - bool Enqueue(mirror::Object* to_ref) { - size_t t; - do { - t = tail_.LoadRelaxed(); - size_t h = head_.LoadSequentiallyConsistent(); - if (t + size_ == h) { - // It's full. - return false; - } - } while (!tail_.CompareExchangeWeakSequentiallyConsistent(t, t + 1)); - // We got a slot but its content has not been filled yet at this point. - GetSlotAddr(t)->StoreSequentiallyConsistent(to_ref); - return true; - } - - // Thread-unsafe. - bool EnqueueThreadUnsafe(mirror::Object* to_ref) { - size_t t = tail_.LoadRelaxed(); - size_t h = head_.LoadRelaxed(); - if (t + size_ == h) { - // It's full. - return false; - } - GetSlotAddr(t)->StoreRelaxed(to_ref); - tail_.StoreRelaxed(t + 1); - return true; - } - - // Single-consumer dequeue. - mirror::Object* Dequeue() { - size_t h = head_.LoadRelaxed(); - size_t t = tail_.LoadSequentiallyConsistent(); - if (h == t) { - // it's empty. - return nullptr; - } - Atomic<mirror::Object*>* slot = GetSlotAddr(h); - mirror::Object* ref = slot->LoadSequentiallyConsistent(); - while (ref == nullptr) { - // Wait until the slot content becomes visible. - ref = slot->LoadSequentiallyConsistent(); - } - slot->StoreRelaxed(nullptr); - head_.StoreSequentiallyConsistent(h + 1); - return ref; - } - - bool IsEmpty() { - size_t h = head_.LoadSequentiallyConsistent(); - size_t t = tail_.LoadSequentiallyConsistent(); - return h == t; - } - - void Clear() { - head_.StoreRelaxed(0); - tail_.StoreRelaxed(0); - memset(buf_.get(), 0, size_ * sizeof(Atomic<mirror::Object*>)); - } - - private: - Atomic<size_t> head_; - Atomic<size_t> tail_; - - size_t size_; - std::unique_ptr<Atomic<mirror::Object*>[]> buf_; -}; - class ConcurrentCopying : public GarbageCollector { public: // TODO: disable thse flags for production use. @@ -185,10 +102,12 @@ class ConcurrentCopying : public GarbageCollector { Barrier& GetBarrier() { return *gc_barrier_; } + bool IsWeakRefAccessEnabled() { + return weak_ref_access_enabled_.LoadRelaxed(); + } + void RevokeThreadLocalMarkStack(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); private: - mirror::Object* PopOffMarkStack(); - template<bool kThreadSafe> void PushOntoMarkStack(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* Copy(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void Scan(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -202,11 +121,18 @@ class ConcurrentCopying : public GarbageCollector { void VerifyNoFromSpaceReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); accounting::ObjectStack* GetAllocationStack(); accounting::ObjectStack* GetLiveStack(); - bool ProcessMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) + void ProcessMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool ProcessMarkStackOnce() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void ProcessMarkStackRef(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + size_t ProcessThreadLocalMarkStacks(bool disable_weak_ref_access) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void ProcessReferences(Thread* self, bool concurrent) + void RevokeThreadLocalMarkStacks(bool disable_weak_ref_access) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void SwitchToSharedMarkStackMode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void SwitchToGcExclusiveMarkStackMode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void ProcessReferences(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* IsMarked(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static mirror::Object* MarkCallback(mirror::Object* from_ref, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -229,7 +155,7 @@ class ConcurrentCopying : public GarbageCollector { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* AllocateInSkippedBlock(size_t alloc_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void CheckEmptyMarkQueue() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void CheckEmptyMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void IssueEmptyCheckpoint() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool IsOnAllocStack(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* GetFwdPtr(mirror::Object* from_ref) @@ -242,10 +168,19 @@ class ConcurrentCopying : public GarbageCollector { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj, mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void ReenableWeakRefAccess(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr<Barrier> gc_barrier_; - MarkQueue mark_queue_; + std::unique_ptr<accounting::ObjectStack> gc_mark_stack_; + Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + std::vector<accounting::ObjectStack*> revoked_mark_stacks_ + GUARDED_BY(mark_stack_lock_); + static constexpr size_t kMarkStackSize = kPageSize; + static constexpr size_t kMarkStackPoolSize = 256; + std::vector<accounting::ObjectStack*> pooled_mark_stacks_ + GUARDED_BY(mark_stack_lock_); + Thread* thread_running_gc_; bool is_marking_; // True while marking is ongoing. bool is_active_; // True while the collection is ongoing. bool is_asserting_to_space_invariant_; // True while asserting the to-space invariant. @@ -258,7 +193,18 @@ class ConcurrentCopying : public GarbageCollector { size_t live_stack_freeze_size_; size_t from_space_num_objects_at_first_pause_; size_t from_space_num_bytes_at_first_pause_; - Atomic<int> is_mark_queue_push_disallowed_; + Atomic<int> is_mark_stack_push_disallowed_; + enum MarkStackMode { + kMarkStackModeOff = 0, // Mark stack is off. + kMarkStackModeThreadLocal, // All threads except for the GC-running thread push refs onto + // thread-local mark stacks. The GC-running thread pushes onto and + // pops off the GC mark stack without a lock. + kMarkStackModeShared, // All threads share the GC mark stack with a lock. + kMarkStackModeGcExclusive // The GC-running thread pushes onto and pops from the GC mark stack + // without a lock. Other threads won't access the mark stack. + }; + Atomic<MarkStackMode> mark_stack_mode_; + Atomic<bool> weak_ref_access_enabled_; // How many objects and bytes we moved. Used for accounting. Atomic<size_t> bytes_moved_; @@ -284,6 +230,7 @@ class ConcurrentCopying : public GarbageCollector { friend class ThreadFlipVisitor; friend class FlipCallback; friend class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor; + friend class RevokeThreadLocalMarkStackCheckpoint; DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying); }; diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index 4d51d387c8..c08ed0e349 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -53,15 +53,27 @@ void ReferenceProcessor::DisableSlowPath(Thread* self) { condition_.Broadcast(self); } +void ReferenceProcessor::BroadcastForSlowPath(Thread* self) { + CHECK(kUseReadBarrier); + MutexLock mu(self, *Locks::reference_processor_lock_); + condition_.Broadcast(self); +} + mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference* reference) { - mirror::Object* const referent = reference->GetReferent(); - // If the referent is null then it is already cleared, we can just return null since there is no - // scenario where it becomes non-null during the reference processing phase. - if (UNLIKELY(!SlowPathEnabled()) || referent == nullptr) { - return referent; + if (!kUseReadBarrier || self->GetWeakRefAccessEnabled()) { + // Under read barrier / concurrent copying collector, it's not safe to call GetReferent() when + // weak ref access is disabled as the call includes a read barrier which may push a ref onto the + // mark stack and interfere with termination of marking. + mirror::Object* const referent = reference->GetReferent(); + // If the referent is null then it is already cleared, we can just return null since there is no + // scenario where it becomes non-null during the reference processing phase. + if (UNLIKELY(!SlowPathEnabled()) || referent == nullptr) { + return referent; + } } MutexLock mu(self, *Locks::reference_processor_lock_); - while (SlowPathEnabled()) { + while ((!kUseReadBarrier && SlowPathEnabled()) || + (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) { mirror::HeapReference<mirror::Object>* const referent_addr = reference->GetReferentReferenceAddr(); // If the referent became cleared, return it. Don't need barrier since thread roots can't get @@ -128,7 +140,12 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing process_references_args_.is_marked_callback_ = is_marked_callback; process_references_args_.mark_callback_ = mark_object_callback; process_references_args_.arg_ = arg; - CHECK_EQ(SlowPathEnabled(), concurrent) << "Slow path must be enabled iff concurrent"; + if (!kUseReadBarrier) { + CHECK_EQ(SlowPathEnabled(), concurrent) << "Slow path must be enabled iff concurrent"; + } else { + // Weak ref access is enabled at Zygote compaction by SemiSpace (concurrent == false). + CHECK_EQ(!self->GetWeakRefAccessEnabled(), concurrent); + } } // Unless required to clear soft references with white references, preserve some white referents. if (!clear_soft_references) { @@ -178,9 +195,11 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timing // starts since there is a small window of time where slow_path_enabled_ is enabled but the // callback isn't yet set. process_references_args_.is_marked_callback_ = nullptr; - if (concurrent) { - // Done processing, disable the slow path and broadcast to the waiters. - DisableSlowPath(self); + if (!kUseReadBarrier) { + if (concurrent) { + // Done processing, disable the slow path and broadcast to the waiters. + DisableSlowPath(self); + } } } } @@ -264,7 +283,8 @@ bool ReferenceProcessor::MakeCircularListIfUnenqueued(mirror::FinalizerReference Thread* self = Thread::Current(); MutexLock mu(self, *Locks::reference_processor_lock_); // Wait untul we are done processing reference. - while (SlowPathEnabled()) { + while ((!kUseReadBarrier && SlowPathEnabled()) || + (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) { condition_.WaitHoldingLocks(self); } // At this point, since the sentinel of the reference is live, it is guaranteed to not be diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h index a44319ba5a..284d13cd4b 100644 --- a/runtime/gc/reference_processor.h +++ b/runtime/gc/reference_processor.h @@ -54,6 +54,7 @@ class ReferenceProcessor { // Only allow setting this with mutators suspended so that we can avoid using a lock in the // GetReferent fast path as an optimization. void EnableSlowPath() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void BroadcastForSlowPath(Thread* self); // Decode the referent, may block if references are being processed. mirror::Object* GetReferent(Thread* self, mirror::Reference* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::reference_processor_lock_); diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc index 2a962784ca..2a06ab3e5d 100644 --- a/runtime/intern_table.cc +++ b/runtime/intern_table.cc @@ -231,13 +231,21 @@ void InternTable::EnsureNewInternsDisallowed() { CHECK(!allow_new_interns_); } +void InternTable::BroadcastForNewInterns() { + CHECK(kUseReadBarrier); + Thread* self = Thread::Current(); + MutexLock mu(self, *Locks::intern_table_lock_); + new_intern_condition_.Broadcast(self); +} + mirror::String* InternTable::Insert(mirror::String* s, bool is_strong) { if (s == nullptr) { return nullptr; } Thread* self = Thread::Current(); MutexLock mu(self, *Locks::intern_table_lock_); - while (UNLIKELY(!allow_new_interns_)) { + while (UNLIKELY((!kUseReadBarrier && !allow_new_interns_) || + (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { new_intern_condition_.WaitHoldingLocks(self); } // Check the strong table for a match. diff --git a/runtime/intern_table.h b/runtime/intern_table.h index 97ce73c52e..53f6f75ae0 100644 --- a/runtime/intern_table.h +++ b/runtime/intern_table.h @@ -88,6 +88,7 @@ class InternTable { void DisallowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void AllowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void EnsureNewInternsDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void BroadcastForNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Adds all of the resolved image strings from the image space into the intern table. The // advantage of doing this is preventing expensive DexFile::FindStringId calls. diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc index f1deacf39a..36adbeaa5e 100644 --- a/runtime/java_vm_ext.cc +++ b/runtime/java_vm_ext.cc @@ -473,7 +473,8 @@ jweak JavaVMExt::AddWeakGlobalRef(Thread* self, mirror::Object* obj) { return nullptr; } MutexLock mu(self, weak_globals_lock_); - while (UNLIKELY(!allow_new_weak_globals_)) { + while (UNLIKELY((!kUseReadBarrier && !allow_new_weak_globals_) || + (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { weak_globals_add_condition_.WaitHoldingLocks(self); } IndirectRef ref = weak_globals_.Add(IRT_FIRST_SEGMENT, obj); @@ -559,6 +560,13 @@ void JavaVMExt::EnsureNewWeakGlobalsDisallowed() { CHECK(!allow_new_weak_globals_); } +void JavaVMExt::BroadcastForNewWeakGlobals() { + CHECK(kUseReadBarrier); + Thread* self = Thread::Current(); + MutexLock mu(self, weak_globals_lock_); + weak_globals_add_condition_.Broadcast(self); +} + mirror::Object* JavaVMExt::DecodeGlobal(Thread* self, IndirectRef ref) { return globals_.SynchronizedGet(self, &globals_lock_, ref); } @@ -570,7 +578,8 @@ void JavaVMExt::UpdateGlobal(Thread* self, IndirectRef ref, mirror::Object* resu mirror::Object* JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) { MutexLock mu(self, weak_globals_lock_); - while (UNLIKELY(!allow_new_weak_globals_)) { + while (UNLIKELY((!kUseReadBarrier && !allow_new_weak_globals_) || + (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { weak_globals_add_condition_.WaitHoldingLocks(self); } return weak_globals_.Get(ref); diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h index 4fdf45a03e..694a54570b 100644 --- a/runtime/java_vm_ext.h +++ b/runtime/java_vm_ext.h @@ -108,6 +108,7 @@ class JavaVMExt : public JavaVM { void DisallowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void AllowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void EnsureNewWeakGlobalsDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void BroadcastForNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); jobject AddGlobalRef(Thread* self, mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 4be25d6946..bc898909cc 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -1146,10 +1146,18 @@ void MonitorList::EnsureNewMonitorsDisallowed() { CHECK(!allow_new_monitors_); } +void MonitorList::BroadcastForNewMonitors() { + CHECK(kUseReadBarrier); + Thread* self = Thread::Current(); + MutexLock mu(self, monitor_list_lock_); + monitor_add_condition_.Broadcast(self); +} + void MonitorList::Add(Monitor* m) { Thread* self = Thread::Current(); MutexLock mu(self, monitor_list_lock_); - while (UNLIKELY(!allow_new_monitors_)) { + while (UNLIKELY((!kUseReadBarrier && !allow_new_monitors_) || + (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { monitor_add_condition_.WaitHoldingLocks(self); } list_.push_front(m); diff --git a/runtime/monitor.h b/runtime/monitor.h index 8f3a91d7f6..8f6fb75c29 100644 --- a/runtime/monitor.h +++ b/runtime/monitor.h @@ -292,6 +292,7 @@ class MonitorList { void DisallowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_); void AllowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_); void EnsureNewMonitorsDisallowed() LOCKS_EXCLUDED(monitor_list_lock_); + void BroadcastForNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_); // Returns how many monitors were deflated. size_t DeflateMonitors() LOCKS_EXCLUDED(monitor_list_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/oat.h b/runtime/oat.h index 5706c4e680..3451d0f7b4 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '6', '5', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '6', '6', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 5067b0d60c..a2a745e60c 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1512,6 +1512,13 @@ void Runtime::EnsureNewSystemWeaksDisallowed() { java_vm_->EnsureNewWeakGlobalsDisallowed(); } +void Runtime::BroadcastForNewSystemWeaks() { + CHECK(kUseReadBarrier); + monitor_list_->BroadcastForNewMonitors(); + intern_table_->BroadcastForNewInterns(); + java_vm_->BroadcastForNewWeakGlobals(); +} + void Runtime::SetInstructionSet(InstructionSet instruction_set) { instruction_set_ = instruction_set; if ((instruction_set_ == kThumb2) || (instruction_set_ == kArm)) { diff --git a/runtime/runtime.h b/runtime/runtime.h index bcc7118db0..13cccc0537 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -299,6 +299,7 @@ class Runtime { void DisallowNewSystemWeaks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void AllowNewSystemWeaks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void EnsureNewSystemWeaksDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void BroadcastForNewSystemWeaks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If // clean_dirty is true then dirty roots will be marked as non-dirty after visiting. diff --git a/runtime/thread.cc b/runtime/thread.cc index 37a86f1218..7ee0ff1af4 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -1289,6 +1289,7 @@ Thread::Thread(bool daemon) : tls32_(daemon), wait_monitor_(nullptr), interrupte tlsPtr_.checkpoint_functions[i] = nullptr; } tlsPtr_.flip_function = nullptr; + tlsPtr_.thread_local_mark_stack = nullptr; tls32_.suspended_at_suspend_check = false; } @@ -1407,6 +1408,9 @@ void Thread::Destroy() { { ScopedObjectAccess soa(self); Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this); + if (kUseReadBarrier) { + Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->RevokeThreadLocalMarkStack(this); + } } } diff --git a/runtime/thread.h b/runtime/thread.h index 0e71c08b07..ee308a8cf9 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -46,6 +46,9 @@ namespace art { namespace gc { +namespace accounting { + template<class T> class AtomicStack; +} // namespace accounting namespace collector { class SemiSpace; } // namespace collector @@ -232,6 +235,15 @@ class Thread { void SetFlipFunction(Closure* function); Closure* GetFlipFunction(); + gc::accounting::AtomicStack<mirror::Object>* GetThreadLocalMarkStack() { + CHECK(kUseReadBarrier); + return tlsPtr_.thread_local_mark_stack; + } + void SetThreadLocalMarkStack(gc::accounting::AtomicStack<mirror::Object>* stack) { + CHECK(kUseReadBarrier); + tlsPtr_.thread_local_mark_stack = stack; + } + // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero. void FullSuspendCheck() @@ -772,6 +784,16 @@ class Thread { tls32_.debug_method_entry_ = false; } + bool GetWeakRefAccessEnabled() const { + CHECK(kUseReadBarrier); + return tls32_.weak_ref_access_enabled; + } + + void SetWeakRefAccessEnabled(bool enabled) { + CHECK(kUseReadBarrier); + tls32_.weak_ref_access_enabled = enabled; + } + // Activates single step control for debugging. The thread takes the // ownership of the given SingleStepControl*. It is deleted by a call // to DeactivateSingleStepControl or upon thread destruction. @@ -1060,7 +1082,7 @@ class Thread { daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0), thread_exit_check_count(0), handling_signal_(false), deoptimization_return_value_is_reference(false), suspended_at_suspend_check(false), - ready_for_debug_invoke(false), debug_method_entry_(false) { + ready_for_debug_invoke(false), debug_method_entry_(false), weak_ref_access_enabled(true) { } union StateAndFlags state_and_flags; @@ -1117,6 +1139,15 @@ class Thread { // True if the thread enters a method. This is used to detect method entry // event for the debugger. bool32_t debug_method_entry_; + + // True if the thread is allowed to access a weak ref (Reference::GetReferent() and system + // weaks) and to potentially mark an object alive/gray. This is used for concurrent reference + // processing of the CC collector only. This is thread local so that we can enable/disable weak + // ref access by using a checkpoint and avoid a race around the time weak ref access gets + // disabled and concurrent reference processing begins (if weak ref access is disabled during a + // pause, this is not an issue.) Other collectors use Runtime::DisallowNewSystemWeaks() and + // ReferenceProcessor::EnableSlowPath(). + bool32_t weak_ref_access_enabled; } tls32_; struct PACKED(8) tls_64bit_sized_values { @@ -1268,6 +1299,9 @@ class Thread { // Current method verifier, used for root marking. verifier::MethodVerifier* method_verifier; + + // Thread-local mark stack for the concurrent copying collector. + gc::accounting::AtomicStack<mirror::Object>* thread_local_mark_stack; } tlsPtr_; // Guards the 'interrupted_' and 'wait_monitor_' members. diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index b697b43a77..47e5b529dd 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -33,6 +33,7 @@ #include "base/time_utils.h" #include "base/timing_logger.h" #include "debugger.h" +#include "gc/collector/concurrent_copying.h" #include "jni_internal.h" #include "lock_word.h" #include "monitor.h" @@ -1102,6 +1103,12 @@ void ThreadList::Register(Thread* self) { } CHECK(!Contains(self)); list_.push_back(self); + if (kUseReadBarrier) { + // Initialize this according to the state of the CC collector. + bool weak_ref_access_enabled = + Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled(); + self->SetWeakRefAccessEnabled(weak_ref_access_enabled); + } } void ThreadList::Unregister(Thread* self) { |