Thread-local mark stacks for the CC collector.
Thread-local mark stacks are assigned to mutators where they push
references in read barriers to reduce the (CAS) synchronization cost
in a global mark stack/queue.
We step through three mark stack modes (thread-local, shared,
GC-exclusive) and use per-thread flags to disable/enable system weak
accesses (only for the CC collector) instead of the existing global
one to safely perform the marking phase. The reasons are 1)
thread-local mark stacks for mutators need to be revoked using a
checkpoint to avoid races (incorrectly leaving a reference on mark
stacks) when terminating marking, and 2) we can’t use a checkpoint
while system weak accesses are disabled (or a deadlock would
happen). More details are described in the code comments.
Performance improvements in Ritzperf EAAC: a ~2.8% improvement
(13290->12918) in run time and a ~23% improvement (51.6s->39.8s) in
the total GC time on N5.
Bug: 12687968
Change-Id: I5d234d7e48bf115cd773d38bdb62ad24ce9116c7
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 20d75f3..9142012 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -89,7 +89,7 @@
art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 128
+#define THREAD_CARD_TABLE_OFFSET 136
ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 0a5ebfa..77b1e86 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -133,7 +133,8 @@
sizeof(void*) * kLockLevelCount);
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, nested_signal_state, flip_function, sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, flip_function, method_verifier, sizeof(void*));
- EXPECT_OFFSET_DIFF(Thread, tlsPtr_.method_verifier, Thread, wait_mutex_, sizeof(void*),
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, method_verifier, thread_local_mark_stack, sizeof(void*));
+ EXPECT_OFFSET_DIFF(Thread, tlsPtr_.thread_local_mark_stack, Thread, wait_mutex_, sizeof(void*),
thread_tlsptr_end);
}
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index ac716ea..93f32e8 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -156,6 +156,10 @@
return Size() == 0;
}
+ bool IsFull() const {
+ return Size() == growth_limit_;
+ }
+
size_t Size() const {
DCHECK_LE(front_index_.LoadRelaxed(), back_index_.LoadRelaxed());
return back_index_.LoadRelaxed() - front_index_.LoadRelaxed();
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 6537ed2..88c475b 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -265,7 +265,8 @@
}
// Wait for GC's sweeping to complete and allow new records
- while (UNLIKELY(!records->allow_new_record_)) {
+ while (UNLIKELY((!kUseReadBarrier && !records->allow_new_record_) ||
+ (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
records->new_record_condition_.WaitHoldingLocks(self);
}
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 5e69b79..ac4fc69 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -17,6 +17,7 @@
#include "concurrent_copying.h"
#include "art_field-inl.h"
+#include "base/stl_util.h"
#include "gc/accounting/heap_bitmap-inl.h"
#include "gc/accounting/space_bitmap-inl.h"
#include "gc/space/image_space.h"
@@ -37,17 +38,22 @@
: GarbageCollector(heap,
name_prefix + (name_prefix.empty() ? "" : " ") +
"concurrent copying + mark sweep"),
- region_space_(nullptr), gc_barrier_(new Barrier(0)), mark_queue_(2 * MB),
+ region_space_(nullptr), gc_barrier_(new Barrier(0)),
+ gc_mark_stack_(accounting::ObjectStack::Create("concurrent copying gc mark stack",
+ 2 * MB, 2 * MB)),
+ mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
+ thread_running_gc_(nullptr),
is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
- heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0),
+ heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff),
+ weak_ref_access_enabled_(true),
skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
rb_table_(heap_->GetReadBarrierTable()),
force_evacuate_all_(false) {
static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize,
"The region space size and the read barrier table region size must match");
cc_heap_bitmap_.reset(new accounting::HeapBitmap(heap));
+ Thread* self = Thread::Current();
{
- Thread* self = Thread::Current();
ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
// Cache this so that we won't have to lock heap_bitmap_lock_ in
// Mark() which could cause a nested lock on heap_bitmap_lock_
@@ -55,9 +61,19 @@
// (class_linker_lock_ and heap_bitmap_lock_).
heap_mark_bitmap_ = heap->GetMarkBitmap();
}
+ {
+ MutexLock mu(self, mark_stack_lock_);
+ for (size_t i = 0; i < kMarkStackPoolSize; ++i) {
+ accounting::AtomicStack<mirror::Object>* mark_stack =
+ accounting::AtomicStack<mirror::Object>::Create(
+ "thread local mark stack", kMarkStackSize, kMarkStackSize);
+ pooled_mark_stacks_.push_back(mark_stack);
+ }
+ }
}
ConcurrentCopying::~ConcurrentCopying() {
+ STLDeleteElements(&pooled_mark_stacks_);
}
void ConcurrentCopying::RunPhases() {
@@ -65,6 +81,7 @@
CHECK(!is_active_);
is_active_ = true;
Thread* self = Thread::Current();
+ thread_running_gc_ = self;
Locks::mutator_lock_->AssertNotHeld(self);
{
ReaderMutexLock mu(self, *Locks::mutator_lock_);
@@ -79,7 +96,7 @@
if (kEnableNoFromSpaceRefsVerification || kIsDebugBuild) {
TimingLogger::ScopedTiming split("(Paused)VerifyNoFromSpaceReferences", GetTimings());
ScopedPause pause(this);
- CheckEmptyMarkQueue();
+ CheckEmptyMarkStack();
if (kVerboseMode) {
LOG(INFO) << "Verifying no from-space refs";
}
@@ -87,7 +104,7 @@
if (kVerboseMode) {
LOG(INFO) << "Done verifying no from-space refs";
}
- CheckEmptyMarkQueue();
+ CheckEmptyMarkStack();
}
{
ReaderMutexLock mu(self, *Locks::mutator_lock_);
@@ -96,6 +113,7 @@
FinishPhase();
CHECK(is_active_);
is_active_ = false;
+ thread_running_gc_ = nullptr;
}
void ConcurrentCopying::BindBitmaps() {
@@ -132,7 +150,7 @@
LOG(INFO) << "Region-space : " << reinterpret_cast<void*>(region_space_->Begin()) << "-"
<< reinterpret_cast<void*>(region_space_->Limit());
}
- CHECK(mark_queue_.IsEmpty());
+ CheckEmptyMarkStack();
immune_region_.Reset();
bytes_moved_.StoreRelaxed(0);
objects_moved_.StoreRelaxed(0);
@@ -209,6 +227,7 @@
cc->from_space_num_bytes_at_first_pause_ = cc->region_space_->GetBytesAllocated();
}
cc->is_marking_ = true;
+ cc->mark_stack_mode_.StoreRelaxed(ConcurrentCopying::kMarkStackModeThreadLocal);
if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
CHECK(Runtime::Current()->IsAotCompiler());
TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
@@ -283,7 +302,7 @@
} else {
// Newly marked. Set the gray bit and push it onto the mark stack.
CHECK(!kUseBakerReadBarrier || obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
- collector_->PushOntoMarkStack<true>(obj);
+ collector_->PushOntoMarkStack(obj);
}
}
@@ -319,6 +338,7 @@
if (kVerboseMode) {
LOG(INFO) << "GC MarkingPhase";
}
+ CHECK(weak_ref_access_enabled_);
{
// Mark the image root. The WB-based collectors do not need to
// scan the image objects from roots by relying on the card table,
@@ -370,37 +390,47 @@
Thread* self = Thread::Current();
{
TimingLogger::ScopedTiming split6("ProcessMarkStack", GetTimings());
- // Process the mark stack and issue an empty check point. If the
- // mark stack is still empty after the check point, we're
- // done. Otherwise, repeat.
+ // We transition through three mark stack modes (thread-local, shared, GC-exclusive). The
+ // primary reasons are the fact that we need to use a checkpoint to process thread-local mark
+ // stacks, but after we disable weak refs accesses, we can't use a checkpoint due to a deadlock
+ // issue because running threads potentially blocking at WaitHoldingLocks, and that once we
+ // reach the point where we process weak references, we can avoid using a lock when accessing
+ // the GC mark stack, which makes mark stack processing more efficient.
+
+ // Process the mark stack once in the thread local stack mode. This marks most of the live
+ // objects, aside from weak ref accesses with read barriers (Reference::GetReferent() and system
+ // weaks) that may happen concurrently while we processing the mark stack and newly mark/gray
+ // objects and push refs on the mark stack.
ProcessMarkStack();
- size_t count = 0;
- while (!ProcessMarkStack()) {
- ++count;
- if (kVerboseMode) {
- LOG(INFO) << "Issue an empty check point. " << count;
- }
- IssueEmptyCheckpoint();
- }
- // Need to ensure the mark stack is empty before reference
- // processing to get rid of non-reference gray objects.
- CheckEmptyMarkQueue();
- // Enable the GetReference slow path and disallow access to the system weaks.
- GetHeap()->GetReferenceProcessor()->EnableSlowPath();
- Runtime::Current()->DisallowNewSystemWeaks();
- QuasiAtomic::ThreadFenceForConstructor();
- // Lock-unlock the system weak locks so that there's no thread in
- // the middle of accessing system weaks.
- Runtime::Current()->EnsureNewSystemWeaksDisallowed();
- // Note: Do not issue a checkpoint from here to the
- // SweepSystemWeaks call or else a deadlock due to
- // WaitHoldingLocks() would occur.
+ // Switch to the shared mark stack mode. That is, revoke and process thread-local mark stacks
+ // for the last time before transitioning to the shared mark stack mode, which would process new
+ // refs that may have been concurrently pushed onto the mark stack during the ProcessMarkStack()
+ // call above. At the same time, disable weak ref accesses using a per-thread flag. It's
+ // important to do these together in a single checkpoint so that we can ensure that mutators
+ // won't newly gray objects and push new refs onto the mark stack due to weak ref accesses and
+ // mutators safely transition to the shared mark stack mode (without leaving unprocessed refs on
+ // the thread-local mark stacks), without a race. This is why we use a thread-local weak ref
+ // access flag Thread::tls32_.weak_ref_access_enabled_ instead of the global ones.
+ SwitchToSharedMarkStackMode();
+ CHECK(!self->GetWeakRefAccessEnabled());
+ // Now that weak refs accesses are disabled, once we exhaust the shared mark stack again here
+ // (which may be non-empty if there were refs found on thread-local mark stacks during the above
+ // SwitchToSharedMarkStackMode() call), we won't have new refs to process, that is, mutators
+ // (via read barriers) have no way to produce any more refs to process. Marking converges once
+ // before we process weak refs below.
+ ProcessMarkStack();
+ CheckEmptyMarkStack();
+ // Switch to the GC exclusive mark stack mode so that we can process the mark stack without a
+ // lock from this point on.
+ SwitchToGcExclusiveMarkStackMode();
+ CheckEmptyMarkStack();
if (kVerboseMode) {
- LOG(INFO) << "Enabled the ref proc slow path & disabled access to system weaks.";
LOG(INFO) << "ProcessReferences";
}
- ProcessReferences(self, true);
- CheckEmptyMarkQueue();
+ // Process weak references. This may produce new refs to process and have them processed via
+ // ProcessMarkStackCallback (in the GC exclusive mark stack mode).
+ ProcessReferences(self);
+ CheckEmptyMarkStack();
if (kVerboseMode) {
LOG(INFO) << "SweepSystemWeaks";
}
@@ -408,33 +438,52 @@
if (kVerboseMode) {
LOG(INFO) << "SweepSystemWeaks done";
}
- // Because hash_set::Erase() can call the hash function for
- // arbitrary elements in the weak intern table in
- // InternTable::Table::SweepWeaks(), the above SweepSystemWeaks()
- // call may have marked some objects (strings) alive. So process
- // the mark stack here once again.
+ // Process the mark stack here one last time because the above SweepSystemWeaks() call may have
+ // marked some objects (strings alive) as hash_set::Erase() can call the hash function for
+ // arbitrary elements in the weak intern table in InternTable::Table::SweepWeaks().
ProcessMarkStack();
- CheckEmptyMarkQueue();
- if (kVerboseMode) {
- LOG(INFO) << "AllowNewSystemWeaks";
- }
- Runtime::Current()->AllowNewSystemWeaks();
+ CheckEmptyMarkStack();
+ // Re-enable weak ref accesses.
+ ReenableWeakRefAccess(self);
+ // Issue an empty checkpoint to ensure no threads are still in the middle of a read barrier
+ // which may have a from-space ref cached in a local variable.
IssueEmptyCheckpoint();
- // Disable marking.
+ // Marking is done. Disable marking.
if (kUseTableLookupReadBarrier) {
heap_->rb_table_->ClearAll();
DCHECK(heap_->rb_table_->IsAllCleared());
}
- is_mark_queue_push_disallowed_.StoreSequentiallyConsistent(1);
- is_marking_ = false;
- CheckEmptyMarkQueue();
+ is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(1);
+ is_marking_ = false; // This disables the read barrier/marking of weak roots.
+ mark_stack_mode_.StoreSequentiallyConsistent(kMarkStackModeOff);
+ CheckEmptyMarkStack();
}
+ CHECK(weak_ref_access_enabled_);
if (kVerboseMode) {
LOG(INFO) << "GC end of MarkingPhase";
}
}
+void ConcurrentCopying::ReenableWeakRefAccess(Thread* self) {
+ if (kVerboseMode) {
+ LOG(INFO) << "ReenableWeakRefAccess";
+ }
+ weak_ref_access_enabled_.StoreRelaxed(true); // This is for new threads.
+ QuasiAtomic::ThreadFenceForConstructor();
+ // Iterate all threads (don't need to or can't use a checkpoint) and re-enable weak ref access.
+ {
+ MutexLock mu(self, *Locks::thread_list_lock_);
+ std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+ for (Thread* thread : thread_list) {
+ thread->SetWeakRefAccessEnabled(true);
+ }
+ }
+ // Unblock blocking threads.
+ GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self);
+ Runtime::Current()->BroadcastForNewSystemWeaks();
+}
+
void ConcurrentCopying::IssueEmptyCheckpoint() {
Thread* self = Thread::Current();
EmptyCheckpoint check_point(this);
@@ -455,18 +504,61 @@
Locks::mutator_lock_->SharedLock(self);
}
-mirror::Object* ConcurrentCopying::PopOffMarkStack() {
- return mark_queue_.Dequeue();
-}
-
-template<bool kThreadSafe>
void ConcurrentCopying::PushOntoMarkStack(mirror::Object* to_ref) {
- CHECK_EQ(is_mark_queue_push_disallowed_.LoadRelaxed(), 0)
+ CHECK_EQ(is_mark_stack_push_disallowed_.LoadRelaxed(), 0)
<< " " << to_ref << " " << PrettyTypeOf(to_ref);
- if (kThreadSafe) {
- CHECK(mark_queue_.Enqueue(to_ref)) << "Mark queue overflow";
+ Thread* self = Thread::Current(); // TODO: pass self as an argument from call sites?
+ CHECK(thread_running_gc_ != nullptr);
+ MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+ if (mark_stack_mode == kMarkStackModeThreadLocal) {
+ if (self == thread_running_gc_) {
+ // If GC-running thread, use the GC mark stack instead of a thread-local mark stack.
+ CHECK(self->GetThreadLocalMarkStack() == nullptr);
+ CHECK(!gc_mark_stack_->IsFull());
+ gc_mark_stack_->PushBack(to_ref);
+ } else {
+ // Otherwise, use a thread-local mark stack.
+ accounting::AtomicStack<mirror::Object>* tl_mark_stack = self->GetThreadLocalMarkStack();
+ if (UNLIKELY(tl_mark_stack == nullptr || tl_mark_stack->IsFull())) {
+ MutexLock mu(self, mark_stack_lock_);
+ // Get a new thread local mark stack.
+ accounting::AtomicStack<mirror::Object>* new_tl_mark_stack;
+ if (!pooled_mark_stacks_.empty()) {
+ // Use a pooled mark stack.
+ new_tl_mark_stack = pooled_mark_stacks_.back();
+ pooled_mark_stacks_.pop_back();
+ } else {
+ // None pooled. Create a new one.
+ new_tl_mark_stack =
+ accounting::AtomicStack<mirror::Object>::Create(
+ "thread local mark stack", 4 * KB, 4 * KB);
+ }
+ DCHECK(new_tl_mark_stack != nullptr);
+ DCHECK(new_tl_mark_stack->IsEmpty());
+ new_tl_mark_stack->PushBack(to_ref);
+ self->SetThreadLocalMarkStack(new_tl_mark_stack);
+ if (tl_mark_stack != nullptr) {
+ // Store the old full stack into a vector.
+ revoked_mark_stacks_.push_back(tl_mark_stack);
+ }
+ } else {
+ tl_mark_stack->PushBack(to_ref);
+ }
+ }
+ } else if (mark_stack_mode == kMarkStackModeShared) {
+ // Access the shared GC mark stack with a lock.
+ MutexLock mu(self, mark_stack_lock_);
+ CHECK(!gc_mark_stack_->IsFull());
+ gc_mark_stack_->PushBack(to_ref);
} else {
- CHECK(mark_queue_.EnqueueThreadUnsafe(to_ref)) << "Mark queue overflow";
+ CHECK_EQ(static_cast<uint32_t>(mark_stack_mode),
+ static_cast<uint32_t>(kMarkStackModeGcExclusive));
+ CHECK(self == thread_running_gc_)
+ << "Only GC-running thread should access the mark stack "
+ << "in the GC exclusive mark stack mode";
+ // Access the GC mark stack without a lock.
+ CHECK(!gc_mark_stack_->IsFull());
+ gc_mark_stack_->PushBack(to_ref);
}
}
@@ -695,83 +787,300 @@
ConcurrentCopying* collector_;
};
-bool ConcurrentCopying::ProcessMarkStack() {
+class RevokeThreadLocalMarkStackCheckpoint : public Closure {
+ public:
+ explicit RevokeThreadLocalMarkStackCheckpoint(ConcurrentCopying* concurrent_copying,
+ bool disable_weak_ref_access)
+ : concurrent_copying_(concurrent_copying),
+ disable_weak_ref_access_(disable_weak_ref_access) {
+ }
+
+ virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
+ // Note: self is not necessarily equal to thread since thread may be suspended.
+ Thread* self = Thread::Current();
+ CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
+ << thread->GetState() << " thread " << thread << " self " << self;
+ // Revoke thread local mark stacks.
+ accounting::AtomicStack<mirror::Object>* tl_mark_stack = thread->GetThreadLocalMarkStack();
+ if (tl_mark_stack != nullptr) {
+ MutexLock mu(self, concurrent_copying_->mark_stack_lock_);
+ concurrent_copying_->revoked_mark_stacks_.push_back(tl_mark_stack);
+ thread->SetThreadLocalMarkStack(nullptr);
+ }
+ // Disable weak ref access.
+ if (disable_weak_ref_access_) {
+ thread->SetWeakRefAccessEnabled(false);
+ }
+ // If thread is a running mutator, then act on behalf of the garbage collector.
+ // See the code in ThreadList::RunCheckpoint.
+ if (thread->GetState() == kRunnable) {
+ concurrent_copying_->GetBarrier().Pass(self);
+ }
+ }
+
+ private:
+ ConcurrentCopying* const concurrent_copying_;
+ const bool disable_weak_ref_access_;
+};
+
+void ConcurrentCopying::RevokeThreadLocalMarkStacks(bool disable_weak_ref_access) {
+ Thread* self = Thread::Current();
+ RevokeThreadLocalMarkStackCheckpoint check_point(this, disable_weak_ref_access);
+ ThreadList* thread_list = Runtime::Current()->GetThreadList();
+ gc_barrier_->Init(self, 0);
+ size_t barrier_count = thread_list->RunCheckpoint(&check_point);
+ // If there are no threads to wait which implys that all the checkpoint functions are finished,
+ // then no need to release the mutator lock.
+ if (barrier_count == 0) {
+ return;
+ }
+ Locks::mutator_lock_->SharedUnlock(self);
+ {
+ ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+ gc_barrier_->Increment(self, barrier_count);
+ }
+ Locks::mutator_lock_->SharedLock(self);
+}
+
+void ConcurrentCopying::RevokeThreadLocalMarkStack(Thread* thread) {
+ Thread* self = Thread::Current();
+ CHECK_EQ(self, thread);
+ accounting::AtomicStack<mirror::Object>* tl_mark_stack = thread->GetThreadLocalMarkStack();
+ if (tl_mark_stack != nullptr) {
+ CHECK(is_marking_);
+ MutexLock mu(self, mark_stack_lock_);
+ revoked_mark_stacks_.push_back(tl_mark_stack);
+ thread->SetThreadLocalMarkStack(nullptr);
+ }
+}
+
+void ConcurrentCopying::ProcessMarkStack() {
if (kVerboseMode) {
LOG(INFO) << "ProcessMarkStack. ";
}
- size_t count = 0;
- mirror::Object* to_ref;
- while ((to_ref = PopOffMarkStack()) != nullptr) {
- ++count;
- DCHECK(!region_space_->IsInFromSpace(to_ref));
- if (kUseBakerReadBarrier) {
- DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
- << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
- << " is_marked=" << IsMarked(to_ref);
+ bool empty_prev = false;
+ while (true) {
+ bool empty = ProcessMarkStackOnce();
+ if (empty_prev && empty) {
+ // Saw empty mark stack for a second time, done.
+ break;
}
- // Scan ref fields.
- Scan(to_ref);
- // Mark the gray ref as white or black.
- if (kUseBakerReadBarrier) {
- DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
- << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
- << " is_marked=" << IsMarked(to_ref);
- }
- if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
- to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
- !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) {
- // Leave References gray so that GetReferent() will trigger RB.
- CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
- } else {
-#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
- if (kUseBakerReadBarrier) {
- if (region_space_->IsInToSpace(to_ref)) {
- // If to-space, change from gray to white.
- bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
- ReadBarrier::WhitePtr());
- CHECK(success) << "Must succeed as we won the race.";
- CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
- } else {
- // If non-moving space/unevac from space, change from gray
- // to black. We can't change gray to white because it's not
- // safe to use CAS if two threads change values in opposite
- // directions (A->B and B->A). So, we change it to black to
- // indicate non-moving objects that have been marked
- // through. Note we'd need to change from black to white
- // later (concurrently).
- bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
- ReadBarrier::BlackPtr());
- CHECK(success) << "Must succeed as we won the race.";
- CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
- }
- }
-#else
- DCHECK(!kUseBakerReadBarrier);
-#endif
- }
- if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
- ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
- visitor(to_ref);
- }
+ empty_prev = empty;
}
+}
+
+bool ConcurrentCopying::ProcessMarkStackOnce() {
+ Thread* self = Thread::Current();
+ CHECK(thread_running_gc_ != nullptr);
+ CHECK(self == thread_running_gc_);
+ CHECK(self->GetThreadLocalMarkStack() == nullptr);
+ size_t count = 0;
+ MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+ if (mark_stack_mode == kMarkStackModeThreadLocal) {
+ // Process the thread-local mark stacks and the GC mark stack.
+ count += ProcessThreadLocalMarkStacks(false);
+ while (!gc_mark_stack_->IsEmpty()) {
+ mirror::Object* to_ref = gc_mark_stack_->PopBack();
+ ProcessMarkStackRef(to_ref);
+ ++count;
+ }
+ gc_mark_stack_->Reset();
+ } else if (mark_stack_mode == kMarkStackModeShared) {
+ // Process the shared GC mark stack with a lock.
+ {
+ MutexLock mu(self, mark_stack_lock_);
+ CHECK(revoked_mark_stacks_.empty());
+ }
+ while (true) {
+ std::vector<mirror::Object*> refs;
+ {
+ // Copy refs with lock. Note the number of refs should be small.
+ MutexLock mu(self, mark_stack_lock_);
+ if (gc_mark_stack_->IsEmpty()) {
+ break;
+ }
+ for (StackReference<mirror::Object>* p = gc_mark_stack_->Begin();
+ p != gc_mark_stack_->End(); ++p) {
+ refs.push_back(p->AsMirrorPtr());
+ }
+ gc_mark_stack_->Reset();
+ }
+ for (mirror::Object* ref : refs) {
+ ProcessMarkStackRef(ref);
+ ++count;
+ }
+ }
+ } else {
+ CHECK_EQ(static_cast<uint32_t>(mark_stack_mode),
+ static_cast<uint32_t>(kMarkStackModeGcExclusive));
+ {
+ MutexLock mu(self, mark_stack_lock_);
+ CHECK(revoked_mark_stacks_.empty());
+ }
+ // Process the GC mark stack in the exclusive mode. No need to take the lock.
+ while (!gc_mark_stack_->IsEmpty()) {
+ mirror::Object* to_ref = gc_mark_stack_->PopBack();
+ ProcessMarkStackRef(to_ref);
+ ++count;
+ }
+ gc_mark_stack_->Reset();
+ }
+
// Return true if the stack was empty.
return count == 0;
}
-void ConcurrentCopying::CheckEmptyMarkQueue() {
- if (!mark_queue_.IsEmpty()) {
- while (!mark_queue_.IsEmpty()) {
- mirror::Object* obj = mark_queue_.Dequeue();
- if (kUseBakerReadBarrier) {
- mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
- LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj) << " rb_ptr=" << rb_ptr
- << " is_marked=" << IsMarked(obj);
+size_t ConcurrentCopying::ProcessThreadLocalMarkStacks(bool disable_weak_ref_access) {
+ // Run a checkpoint to collect all thread local mark stacks and iterate over them all.
+ RevokeThreadLocalMarkStacks(disable_weak_ref_access);
+ size_t count = 0;
+ std::vector<accounting::AtomicStack<mirror::Object>*> mark_stacks;
+ {
+ MutexLock mu(Thread::Current(), mark_stack_lock_);
+ // Make a copy of the mark stack vector.
+ mark_stacks = revoked_mark_stacks_;
+ revoked_mark_stacks_.clear();
+ }
+ for (accounting::AtomicStack<mirror::Object>* mark_stack : mark_stacks) {
+ for (StackReference<mirror::Object>* p = mark_stack->Begin(); p != mark_stack->End(); ++p) {
+ mirror::Object* to_ref = p->AsMirrorPtr();
+ ProcessMarkStackRef(to_ref);
+ ++count;
+ }
+ {
+ MutexLock mu(Thread::Current(), mark_stack_lock_);
+ if (pooled_mark_stacks_.size() >= kMarkStackPoolSize) {
+ // The pool has enough. Delete it.
+ delete mark_stack;
} else {
- LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj)
- << " is_marked=" << IsMarked(obj);
+ // Otherwise, put it into the pool for later reuse.
+ mark_stack->Reset();
+ pooled_mark_stacks_.push_back(mark_stack);
}
}
- LOG(FATAL) << "mark queue is not empty";
+ }
+ return count;
+}
+
+void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
+ DCHECK(!region_space_->IsInFromSpace(to_ref));
+ if (kUseBakerReadBarrier) {
+ DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
+ << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+ << " is_marked=" << IsMarked(to_ref);
+ }
+ // Scan ref fields.
+ Scan(to_ref);
+ // Mark the gray ref as white or black.
+ if (kUseBakerReadBarrier) {
+ DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
+ << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+ << " is_marked=" << IsMarked(to_ref);
+ }
+ if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
+ to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
+ !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) {
+ // Leave References gray so that GetReferent() will trigger RB.
+ CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
+ } else {
+#ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+ if (kUseBakerReadBarrier) {
+ if (region_space_->IsInToSpace(to_ref)) {
+ // If to-space, change from gray to white.
+ bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+ ReadBarrier::WhitePtr());
+ CHECK(success) << "Must succeed as we won the race.";
+ CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+ } else {
+ // If non-moving space/unevac from space, change from gray
+ // to black. We can't change gray to white because it's not
+ // safe to use CAS if two threads change values in opposite
+ // directions (A->B and B->A). So, we change it to black to
+ // indicate non-moving objects that have been marked
+ // through. Note we'd need to change from black to white
+ // later (concurrently).
+ bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+ ReadBarrier::BlackPtr());
+ CHECK(success) << "Must succeed as we won the race.";
+ CHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
+ }
+ }
+#else
+ DCHECK(!kUseBakerReadBarrier);
+#endif
+ }
+ if (ReadBarrier::kEnableToSpaceInvariantChecks || kIsDebugBuild) {
+ ConcurrentCopyingAssertToSpaceInvariantObjectVisitor visitor(this);
+ visitor(to_ref);
+ }
+}
+
+void ConcurrentCopying::SwitchToSharedMarkStackMode() {
+ Thread* self = Thread::Current();
+ CHECK(thread_running_gc_ != nullptr);
+ CHECK_EQ(self, thread_running_gc_);
+ CHECK(self->GetThreadLocalMarkStack() == nullptr);
+ MarkStackMode before_mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+ CHECK_EQ(static_cast<uint32_t>(before_mark_stack_mode),
+ static_cast<uint32_t>(kMarkStackModeThreadLocal));
+ mark_stack_mode_.StoreRelaxed(kMarkStackModeShared);
+ CHECK(weak_ref_access_enabled_.LoadRelaxed());
+ weak_ref_access_enabled_.StoreRelaxed(false);
+ QuasiAtomic::ThreadFenceForConstructor();
+ // Process the thread local mark stacks one last time after switching to the shared mark stack
+ // mode and disable weak ref accesses.
+ ProcessThreadLocalMarkStacks(true);
+ if (kVerboseMode) {
+ LOG(INFO) << "Switched to shared mark stack mode and disabled weak ref access";
+ }
+}
+
+void ConcurrentCopying::SwitchToGcExclusiveMarkStackMode() {
+ Thread* self = Thread::Current();
+ CHECK(thread_running_gc_ != nullptr);
+ CHECK_EQ(self, thread_running_gc_);
+ CHECK(self->GetThreadLocalMarkStack() == nullptr);
+ MarkStackMode before_mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+ CHECK_EQ(static_cast<uint32_t>(before_mark_stack_mode),
+ static_cast<uint32_t>(kMarkStackModeShared));
+ mark_stack_mode_.StoreRelaxed(kMarkStackModeGcExclusive);
+ QuasiAtomic::ThreadFenceForConstructor();
+ if (kVerboseMode) {
+ LOG(INFO) << "Switched to GC exclusive mark stack mode";
+ }
+}
+
+void ConcurrentCopying::CheckEmptyMarkStack() {
+ Thread* self = Thread::Current();
+ CHECK(thread_running_gc_ != nullptr);
+ CHECK_EQ(self, thread_running_gc_);
+ CHECK(self->GetThreadLocalMarkStack() == nullptr);
+ MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+ if (mark_stack_mode == kMarkStackModeThreadLocal) {
+ // Thread-local mark stack mode.
+ RevokeThreadLocalMarkStacks(false);
+ MutexLock mu(Thread::Current(), mark_stack_lock_);
+ if (!revoked_mark_stacks_.empty()) {
+ for (accounting::AtomicStack<mirror::Object>* mark_stack : revoked_mark_stacks_) {
+ while (!mark_stack->IsEmpty()) {
+ mirror::Object* obj = mark_stack->PopBack();
+ if (kUseBakerReadBarrier) {
+ mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
+ LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj) << " rb_ptr=" << rb_ptr
+ << " is_marked=" << IsMarked(obj);
+ } else {
+ LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj)
+ << " is_marked=" << IsMarked(obj);
+ }
+ }
+ }
+ LOG(FATAL) << "mark stack is not empty";
+ }
+ } else {
+ // Shared, GC-exclusive, or off.
+ MutexLock mu(Thread::Current(), mark_stack_lock_);
+ CHECK(gc_mark_stack_->IsEmpty());
+ CHECK(revoked_mark_stacks_.empty());
}
}
@@ -791,7 +1100,7 @@
heap_->MarkAllocStackAsLive(live_stack);
live_stack->Reset();
}
- CHECK(mark_queue_.IsEmpty());
+ CheckEmptyMarkStack();
TimingLogger::ScopedTiming split("Sweep", GetTimings());
for (const auto& space : GetHeap()->GetContinuousSpaces()) {
if (space->IsContinuousMemMapAllocSpace()) {
@@ -887,8 +1196,8 @@
}
IssueEmptyCheckpoint();
// Disable the check.
- is_mark_queue_push_disallowed_.StoreSequentiallyConsistent(0);
- CheckEmptyMarkQueue();
+ is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(0);
+ CheckEmptyMarkStack();
}
{
@@ -955,6 +1264,8 @@
region_space_bitmap_ = nullptr;
}
+ CheckEmptyMarkStack();
+
if (kVerboseMode) {
LOG(INFO) << "GC end of ReclaimPhase";
}
@@ -1478,7 +1789,7 @@
}
DCHECK(GetFwdPtr(from_ref) == to_ref);
CHECK_NE(to_ref->GetLockWord(false).GetState(), LockWord::kForwardingAddress);
- PushOntoMarkStack<true>(to_ref);
+ PushOntoMarkStack(to_ref);
return to_ref;
} else {
// The CAS failed. It may have lost the race or may have failed
@@ -1611,7 +1922,7 @@
if (kUseBakerReadBarrier) {
DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
}
- PushOntoMarkStack<true>(to_ref);
+ PushOntoMarkStack(to_ref);
}
} else {
// from_ref is in a non-moving space.
@@ -1638,7 +1949,7 @@
if (kUseBakerReadBarrier) {
DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
}
- PushOntoMarkStack<true>(to_ref);
+ PushOntoMarkStack(to_ref);
}
} else {
// Use the mark bitmap.
@@ -1694,7 +2005,7 @@
if (kUseBakerReadBarrier) {
DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
}
- PushOntoMarkStack<true>(to_ref);
+ PushOntoMarkStack(to_ref);
}
}
}
@@ -1704,9 +2015,11 @@
}
void ConcurrentCopying::FinishPhase() {
+ {
+ MutexLock mu(Thread::Current(), mark_stack_lock_);
+ CHECK_EQ(pooled_mark_stacks_.size(), kMarkStackPoolSize);
+ }
region_space_ = nullptr;
- CHECK(mark_queue_.IsEmpty());
- mark_queue_.Clear();
{
MutexLock mu(Thread::Current(), skipped_blocks_lock_);
skipped_blocks_map_.clear();
@@ -1739,7 +2052,8 @@
}
void ConcurrentCopying::ProcessMarkStackCallback(void* arg) {
- reinterpret_cast<ConcurrentCopying*>(arg)->ProcessMarkStack();
+ ConcurrentCopying* concurrent_copying = reinterpret_cast<ConcurrentCopying*>(arg);
+ concurrent_copying->ProcessMarkStack();
}
void ConcurrentCopying::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
@@ -1747,11 +2061,12 @@
klass, reference, &IsHeapReferenceMarkedCallback, this);
}
-void ConcurrentCopying::ProcessReferences(Thread* self, bool concurrent) {
+void ConcurrentCopying::ProcessReferences(Thread* self) {
TimingLogger::ScopedTiming split("ProcessReferences", GetTimings());
+ // We don't really need to lock the heap bitmap lock as we use CAS to mark in bitmaps.
WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
GetHeap()->GetReferenceProcessor()->ProcessReferences(
- concurrent, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
+ true /*concurrent*/, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
&IsHeapReferenceMarkedCallback, &MarkCallback, &ProcessMarkStackCallback, this);
}
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index b1897b8..1fb4703 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -49,89 +49,6 @@
namespace collector {
-// Concurrent queue. Used as the mark stack. TODO: use a concurrent
-// stack for locality.
-class MarkQueue {
- public:
- explicit MarkQueue(size_t size) : size_(size) {
- CHECK(IsPowerOfTwo(size_));
- buf_.reset(new Atomic<mirror::Object*>[size_]);
- CHECK(buf_.get() != nullptr);
- Clear();
- }
-
- ALWAYS_INLINE Atomic<mirror::Object*>* GetSlotAddr(size_t index) {
- return &(buf_.get()[index & (size_ - 1)]);
- }
-
- // Multiple-proceducer enqueue.
- bool Enqueue(mirror::Object* to_ref) {
- size_t t;
- do {
- t = tail_.LoadRelaxed();
- size_t h = head_.LoadSequentiallyConsistent();
- if (t + size_ == h) {
- // It's full.
- return false;
- }
- } while (!tail_.CompareExchangeWeakSequentiallyConsistent(t, t + 1));
- // We got a slot but its content has not been filled yet at this point.
- GetSlotAddr(t)->StoreSequentiallyConsistent(to_ref);
- return true;
- }
-
- // Thread-unsafe.
- bool EnqueueThreadUnsafe(mirror::Object* to_ref) {
- size_t t = tail_.LoadRelaxed();
- size_t h = head_.LoadRelaxed();
- if (t + size_ == h) {
- // It's full.
- return false;
- }
- GetSlotAddr(t)->StoreRelaxed(to_ref);
- tail_.StoreRelaxed(t + 1);
- return true;
- }
-
- // Single-consumer dequeue.
- mirror::Object* Dequeue() {
- size_t h = head_.LoadRelaxed();
- size_t t = tail_.LoadSequentiallyConsistent();
- if (h == t) {
- // it's empty.
- return nullptr;
- }
- Atomic<mirror::Object*>* slot = GetSlotAddr(h);
- mirror::Object* ref = slot->LoadSequentiallyConsistent();
- while (ref == nullptr) {
- // Wait until the slot content becomes visible.
- ref = slot->LoadSequentiallyConsistent();
- }
- slot->StoreRelaxed(nullptr);
- head_.StoreSequentiallyConsistent(h + 1);
- return ref;
- }
-
- bool IsEmpty() {
- size_t h = head_.LoadSequentiallyConsistent();
- size_t t = tail_.LoadSequentiallyConsistent();
- return h == t;
- }
-
- void Clear() {
- head_.StoreRelaxed(0);
- tail_.StoreRelaxed(0);
- memset(buf_.get(), 0, size_ * sizeof(Atomic<mirror::Object*>));
- }
-
- private:
- Atomic<size_t> head_;
- Atomic<size_t> tail_;
-
- size_t size_;
- std::unique_ptr<Atomic<mirror::Object*>[]> buf_;
-};
-
class ConcurrentCopying : public GarbageCollector {
public:
// TODO: disable thse flags for production use.
@@ -185,10 +102,12 @@
Barrier& GetBarrier() {
return *gc_barrier_;
}
+ bool IsWeakRefAccessEnabled() {
+ return weak_ref_access_enabled_.LoadRelaxed();
+ }
+ void RevokeThreadLocalMarkStack(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
private:
- mirror::Object* PopOffMarkStack();
- template<bool kThreadSafe>
void PushOntoMarkStack(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
mirror::Object* Copy(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void Scan(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -202,11 +121,18 @@
void VerifyNoFromSpaceReferences() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
accounting::ObjectStack* GetAllocationStack();
accounting::ObjectStack* GetLiveStack();
- bool ProcessMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void ProcessMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ bool ProcessMarkStackOnce() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void ProcessMarkStackRef(mirror::Object* to_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ size_t ProcessThreadLocalMarkStacks(bool disable_weak_ref_access)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void RevokeThreadLocalMarkStacks(bool disable_weak_ref_access)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void SwitchToSharedMarkStackMode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void SwitchToGcExclusiveMarkStackMode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- void ProcessReferences(Thread* self, bool concurrent)
- SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void ProcessReferences(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
mirror::Object* IsMarked(mirror::Object* from_ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
static mirror::Object* MarkCallback(mirror::Object* from_ref, void* arg)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -229,7 +155,7 @@
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
mirror::Object* AllocateInSkippedBlock(size_t alloc_size)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- void CheckEmptyMarkQueue() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void CheckEmptyMarkStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void IssueEmptyCheckpoint() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
bool IsOnAllocStack(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
mirror::Object* GetFwdPtr(mirror::Object* from_ref)
@@ -242,10 +168,19 @@
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj, mirror::Object* ref)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void ReenableWeakRefAccess(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
space::RegionSpace* region_space_; // The underlying region space.
std::unique_ptr<Barrier> gc_barrier_;
- MarkQueue mark_queue_;
+ std::unique_ptr<accounting::ObjectStack> gc_mark_stack_;
+ Mutex mark_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ std::vector<accounting::ObjectStack*> revoked_mark_stacks_
+ GUARDED_BY(mark_stack_lock_);
+ static constexpr size_t kMarkStackSize = kPageSize;
+ static constexpr size_t kMarkStackPoolSize = 256;
+ std::vector<accounting::ObjectStack*> pooled_mark_stacks_
+ GUARDED_BY(mark_stack_lock_);
+ Thread* thread_running_gc_;
bool is_marking_; // True while marking is ongoing.
bool is_active_; // True while the collection is ongoing.
bool is_asserting_to_space_invariant_; // True while asserting the to-space invariant.
@@ -258,7 +193,18 @@
size_t live_stack_freeze_size_;
size_t from_space_num_objects_at_first_pause_;
size_t from_space_num_bytes_at_first_pause_;
- Atomic<int> is_mark_queue_push_disallowed_;
+ Atomic<int> is_mark_stack_push_disallowed_;
+ enum MarkStackMode {
+ kMarkStackModeOff = 0, // Mark stack is off.
+ kMarkStackModeThreadLocal, // All threads except for the GC-running thread push refs onto
+ // thread-local mark stacks. The GC-running thread pushes onto and
+ // pops off the GC mark stack without a lock.
+ kMarkStackModeShared, // All threads share the GC mark stack with a lock.
+ kMarkStackModeGcExclusive // The GC-running thread pushes onto and pops from the GC mark stack
+ // without a lock. Other threads won't access the mark stack.
+ };
+ Atomic<MarkStackMode> mark_stack_mode_;
+ Atomic<bool> weak_ref_access_enabled_;
// How many objects and bytes we moved. Used for accounting.
Atomic<size_t> bytes_moved_;
@@ -284,6 +230,7 @@
friend class ThreadFlipVisitor;
friend class FlipCallback;
friend class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor;
+ friend class RevokeThreadLocalMarkStackCheckpoint;
DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying);
};
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 4d51d38..c08ed0e 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -53,15 +53,27 @@
condition_.Broadcast(self);
}
+void ReferenceProcessor::BroadcastForSlowPath(Thread* self) {
+ CHECK(kUseReadBarrier);
+ MutexLock mu(self, *Locks::reference_processor_lock_);
+ condition_.Broadcast(self);
+}
+
mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference* reference) {
- mirror::Object* const referent = reference->GetReferent();
- // If the referent is null then it is already cleared, we can just return null since there is no
- // scenario where it becomes non-null during the reference processing phase.
- if (UNLIKELY(!SlowPathEnabled()) || referent == nullptr) {
- return referent;
+ if (!kUseReadBarrier || self->GetWeakRefAccessEnabled()) {
+ // Under read barrier / concurrent copying collector, it's not safe to call GetReferent() when
+ // weak ref access is disabled as the call includes a read barrier which may push a ref onto the
+ // mark stack and interfere with termination of marking.
+ mirror::Object* const referent = reference->GetReferent();
+ // If the referent is null then it is already cleared, we can just return null since there is no
+ // scenario where it becomes non-null during the reference processing phase.
+ if (UNLIKELY(!SlowPathEnabled()) || referent == nullptr) {
+ return referent;
+ }
}
MutexLock mu(self, *Locks::reference_processor_lock_);
- while (SlowPathEnabled()) {
+ while ((!kUseReadBarrier && SlowPathEnabled()) ||
+ (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
mirror::HeapReference<mirror::Object>* const referent_addr =
reference->GetReferentReferenceAddr();
// If the referent became cleared, return it. Don't need barrier since thread roots can't get
@@ -128,7 +140,12 @@
process_references_args_.is_marked_callback_ = is_marked_callback;
process_references_args_.mark_callback_ = mark_object_callback;
process_references_args_.arg_ = arg;
- CHECK_EQ(SlowPathEnabled(), concurrent) << "Slow path must be enabled iff concurrent";
+ if (!kUseReadBarrier) {
+ CHECK_EQ(SlowPathEnabled(), concurrent) << "Slow path must be enabled iff concurrent";
+ } else {
+ // Weak ref access is enabled at Zygote compaction by SemiSpace (concurrent == false).
+ CHECK_EQ(!self->GetWeakRefAccessEnabled(), concurrent);
+ }
}
// Unless required to clear soft references with white references, preserve some white referents.
if (!clear_soft_references) {
@@ -178,9 +195,11 @@
// starts since there is a small window of time where slow_path_enabled_ is enabled but the
// callback isn't yet set.
process_references_args_.is_marked_callback_ = nullptr;
- if (concurrent) {
- // Done processing, disable the slow path and broadcast to the waiters.
- DisableSlowPath(self);
+ if (!kUseReadBarrier) {
+ if (concurrent) {
+ // Done processing, disable the slow path and broadcast to the waiters.
+ DisableSlowPath(self);
+ }
}
}
}
@@ -264,7 +283,8 @@
Thread* self = Thread::Current();
MutexLock mu(self, *Locks::reference_processor_lock_);
// Wait untul we are done processing reference.
- while (SlowPathEnabled()) {
+ while ((!kUseReadBarrier && SlowPathEnabled()) ||
+ (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
condition_.WaitHoldingLocks(self);
}
// At this point, since the sentinel of the reference is live, it is guaranteed to not be
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index a44319b..284d13c 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -54,6 +54,7 @@
// Only allow setting this with mutators suspended so that we can avoid using a lock in the
// GetReferent fast path as an optimization.
void EnableSlowPath() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void BroadcastForSlowPath(Thread* self);
// Decode the referent, may block if references are being processed.
mirror::Object* GetReferent(Thread* self, mirror::Reference* reference)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::reference_processor_lock_);
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 2a96278..2a06ab3 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -231,13 +231,21 @@
CHECK(!allow_new_interns_);
}
+void InternTable::BroadcastForNewInterns() {
+ CHECK(kUseReadBarrier);
+ Thread* self = Thread::Current();
+ MutexLock mu(self, *Locks::intern_table_lock_);
+ new_intern_condition_.Broadcast(self);
+}
+
mirror::String* InternTable::Insert(mirror::String* s, bool is_strong) {
if (s == nullptr) {
return nullptr;
}
Thread* self = Thread::Current();
MutexLock mu(self, *Locks::intern_table_lock_);
- while (UNLIKELY(!allow_new_interns_)) {
+ while (UNLIKELY((!kUseReadBarrier && !allow_new_interns_) ||
+ (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
new_intern_condition_.WaitHoldingLocks(self);
}
// Check the strong table for a match.
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 97ce73c..53f6f75 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -88,6 +88,7 @@
void DisallowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void AllowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void EnsureNewInternsDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void BroadcastForNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
// Adds all of the resolved image strings from the image space into the intern table. The
// advantage of doing this is preventing expensive DexFile::FindStringId calls.
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index f1deacf..36adbea 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -473,7 +473,8 @@
return nullptr;
}
MutexLock mu(self, weak_globals_lock_);
- while (UNLIKELY(!allow_new_weak_globals_)) {
+ while (UNLIKELY((!kUseReadBarrier && !allow_new_weak_globals_) ||
+ (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
weak_globals_add_condition_.WaitHoldingLocks(self);
}
IndirectRef ref = weak_globals_.Add(IRT_FIRST_SEGMENT, obj);
@@ -559,6 +560,13 @@
CHECK(!allow_new_weak_globals_);
}
+void JavaVMExt::BroadcastForNewWeakGlobals() {
+ CHECK(kUseReadBarrier);
+ Thread* self = Thread::Current();
+ MutexLock mu(self, weak_globals_lock_);
+ weak_globals_add_condition_.Broadcast(self);
+}
+
mirror::Object* JavaVMExt::DecodeGlobal(Thread* self, IndirectRef ref) {
return globals_.SynchronizedGet(self, &globals_lock_, ref);
}
@@ -570,7 +578,8 @@
mirror::Object* JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) {
MutexLock mu(self, weak_globals_lock_);
- while (UNLIKELY(!allow_new_weak_globals_)) {
+ while (UNLIKELY((!kUseReadBarrier && !allow_new_weak_globals_) ||
+ (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
weak_globals_add_condition_.WaitHoldingLocks(self);
}
return weak_globals_.Get(ref);
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 4fdf45a..694a545 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -108,6 +108,7 @@
void DisallowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void AllowNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void EnsureNewWeakGlobalsDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void BroadcastForNewWeakGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
jobject AddGlobalRef(Thread* self, mirror::Object* obj)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 4be25d6..bc89890 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1146,10 +1146,18 @@
CHECK(!allow_new_monitors_);
}
+void MonitorList::BroadcastForNewMonitors() {
+ CHECK(kUseReadBarrier);
+ Thread* self = Thread::Current();
+ MutexLock mu(self, monitor_list_lock_);
+ monitor_add_condition_.Broadcast(self);
+}
+
void MonitorList::Add(Monitor* m) {
Thread* self = Thread::Current();
MutexLock mu(self, monitor_list_lock_);
- while (UNLIKELY(!allow_new_monitors_)) {
+ while (UNLIKELY((!kUseReadBarrier && !allow_new_monitors_) ||
+ (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
monitor_add_condition_.WaitHoldingLocks(self);
}
list_.push_front(m);
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 8f3a91d..8f6fb75 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -292,6 +292,7 @@
void DisallowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
void AllowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
void EnsureNewMonitorsDisallowed() LOCKS_EXCLUDED(monitor_list_lock_);
+ void BroadcastForNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
// Returns how many monitors were deflated.
size_t DeflateMonitors() LOCKS_EXCLUDED(monitor_list_lock_)
EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/oat.h b/runtime/oat.h
index 5706c4e..3451d0f 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '6', '5', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '6', '6', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 5067b0d..a2a745e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1512,6 +1512,13 @@
java_vm_->EnsureNewWeakGlobalsDisallowed();
}
+void Runtime::BroadcastForNewSystemWeaks() {
+ CHECK(kUseReadBarrier);
+ monitor_list_->BroadcastForNewMonitors();
+ intern_table_->BroadcastForNewInterns();
+ java_vm_->BroadcastForNewWeakGlobals();
+}
+
void Runtime::SetInstructionSet(InstructionSet instruction_set) {
instruction_set_ = instruction_set;
if ((instruction_set_ == kThumb2) || (instruction_set_ == kArm)) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index bcc7118..13cccc0 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -299,6 +299,7 @@
void DisallowNewSystemWeaks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void AllowNewSystemWeaks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void EnsureNewSystemWeaksDisallowed() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void BroadcastForNewSystemWeaks() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
// Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
// clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 37a86f1..7ee0ff1 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1289,6 +1289,7 @@
tlsPtr_.checkpoint_functions[i] = nullptr;
}
tlsPtr_.flip_function = nullptr;
+ tlsPtr_.thread_local_mark_stack = nullptr;
tls32_.suspended_at_suspend_check = false;
}
@@ -1407,6 +1408,9 @@
{
ScopedObjectAccess soa(self);
Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this);
+ if (kUseReadBarrier) {
+ Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->RevokeThreadLocalMarkStack(this);
+ }
}
}
diff --git a/runtime/thread.h b/runtime/thread.h
index 0e71c08..ee308a8 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -46,6 +46,9 @@
namespace art {
namespace gc {
+namespace accounting {
+ template<class T> class AtomicStack;
+} // namespace accounting
namespace collector {
class SemiSpace;
} // namespace collector
@@ -232,6 +235,15 @@
void SetFlipFunction(Closure* function);
Closure* GetFlipFunction();
+ gc::accounting::AtomicStack<mirror::Object>* GetThreadLocalMarkStack() {
+ CHECK(kUseReadBarrier);
+ return tlsPtr_.thread_local_mark_stack;
+ }
+ void SetThreadLocalMarkStack(gc::accounting::AtomicStack<mirror::Object>* stack) {
+ CHECK(kUseReadBarrier);
+ tlsPtr_.thread_local_mark_stack = stack;
+ }
+
// Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
// mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero.
void FullSuspendCheck()
@@ -772,6 +784,16 @@
tls32_.debug_method_entry_ = false;
}
+ bool GetWeakRefAccessEnabled() const {
+ CHECK(kUseReadBarrier);
+ return tls32_.weak_ref_access_enabled;
+ }
+
+ void SetWeakRefAccessEnabled(bool enabled) {
+ CHECK(kUseReadBarrier);
+ tls32_.weak_ref_access_enabled = enabled;
+ }
+
// Activates single step control for debugging. The thread takes the
// ownership of the given SingleStepControl*. It is deleted by a call
// to DeactivateSingleStepControl or upon thread destruction.
@@ -1060,7 +1082,7 @@
daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
thread_exit_check_count(0), handling_signal_(false),
deoptimization_return_value_is_reference(false), suspended_at_suspend_check(false),
- ready_for_debug_invoke(false), debug_method_entry_(false) {
+ ready_for_debug_invoke(false), debug_method_entry_(false), weak_ref_access_enabled(true) {
}
union StateAndFlags state_and_flags;
@@ -1117,6 +1139,15 @@
// True if the thread enters a method. This is used to detect method entry
// event for the debugger.
bool32_t debug_method_entry_;
+
+ // True if the thread is allowed to access a weak ref (Reference::GetReferent() and system
+ // weaks) and to potentially mark an object alive/gray. This is used for concurrent reference
+ // processing of the CC collector only. This is thread local so that we can enable/disable weak
+ // ref access by using a checkpoint and avoid a race around the time weak ref access gets
+ // disabled and concurrent reference processing begins (if weak ref access is disabled during a
+ // pause, this is not an issue.) Other collectors use Runtime::DisallowNewSystemWeaks() and
+ // ReferenceProcessor::EnableSlowPath().
+ bool32_t weak_ref_access_enabled;
} tls32_;
struct PACKED(8) tls_64bit_sized_values {
@@ -1268,6 +1299,9 @@
// Current method verifier, used for root marking.
verifier::MethodVerifier* method_verifier;
+
+ // Thread-local mark stack for the concurrent copying collector.
+ gc::accounting::AtomicStack<mirror::Object>* thread_local_mark_stack;
} tlsPtr_;
// Guards the 'interrupted_' and 'wait_monitor_' members.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index b697b43..47e5b52 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -33,6 +33,7 @@
#include "base/time_utils.h"
#include "base/timing_logger.h"
#include "debugger.h"
+#include "gc/collector/concurrent_copying.h"
#include "jni_internal.h"
#include "lock_word.h"
#include "monitor.h"
@@ -1102,6 +1103,12 @@
}
CHECK(!Contains(self));
list_.push_back(self);
+ if (kUseReadBarrier) {
+ // Initialize this according to the state of the CC collector.
+ bool weak_ref_access_enabled =
+ Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled();
+ self->SetWeakRefAccessEnabled(weak_ref_access_enabled);
+ }
}
void ThreadList::Unregister(Thread* self) {