diff options
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 5 | ||||
-rw-r--r-- | runtime/gc/allocation_record.cc | 1 | ||||
-rw-r--r-- | runtime/gc/collector/concurrent_copying.cc | 44 | ||||
-rw-r--r-- | runtime/gc/collector/mark_sweep.cc | 10 | ||||
-rw-r--r-- | runtime/gc/collector/semi_space.cc | 5 | ||||
-rw-r--r-- | runtime/gc/reference_processor.cc | 263 | ||||
-rw-r--r-- | runtime/gc/reference_processor.h | 43 | ||||
-rw-r--r-- | runtime/gc/reference_queue.cc | 49 | ||||
-rw-r--r-- | runtime/gc/reference_queue.h | 22 | ||||
-rw-r--r-- | runtime/intern_table.cc | 1 | ||||
-rw-r--r-- | runtime/intern_table.h | 2 | ||||
-rw-r--r-- | runtime/interpreter/interpreter_common.h | 1 | ||||
-rw-r--r-- | runtime/jit/jit_code_cache.cc | 1 | ||||
-rw-r--r-- | runtime/jni/java_vm_ext.cc | 4 | ||||
-rw-r--r-- | runtime/thread-inl.h | 19 | ||||
-rw-r--r-- | runtime/thread.h | 44 |
19 files changed, 330 insertions, 196 deletions
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 77b55e455e..0236f0d5a9 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -3475,7 +3475,8 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { Register temp = temps.AcquireW(); __ Ldr(temp, MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArm64PointerSize>().Uint32Value())); - __ Cbz(temp, slow_path->GetEntryLabel()); + static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0); + __ Cbnz(temp, slow_path->GetEntryLabel()); } { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index a4a3457c37..303ac171a7 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2517,8 +2517,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { vixl32::Register temp = temps.Acquire(); __ Ldr(temp, MemOperand(tr, Thread::WeakRefAccessEnabledOffset<kArmPointerSize>().Uint32Value())); - __ Cmp(temp, 0); - __ B(eq, slow_path->GetEntryLabel()); + __ Cmp(temp, enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)); + __ B(ne, slow_path->GetEntryLabel()); } { diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 7c2537495a..3a3886432a 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3346,8 +3346,9 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { if (kEmitCompilerReadBarrier) { // Check self->GetWeakRefAccessEnabled(). ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>(); - __ fs()->cmpl(Address::Absolute(offset), Immediate(0)); - __ j(kEqual, slow_path->GetEntryLabel()); + __ fs()->cmpl(Address::Absolute(offset), + Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled))); + __ j(kNotEqual, slow_path->GetEntryLabel()); } // Load the java.lang.ref.Reference class, use the output register as a temporary. diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index d5a7cb10e1..e3be98732b 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -3098,8 +3098,9 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { if (kEmitCompilerReadBarrier) { // Check self->GetWeakRefAccessEnabled(). ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>(); - __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true), Immediate(0)); - __ j(kEqual, slow_path->GetEntryLabel()); + __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true), + Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled))); + __ j(kNotEqual, slow_path->GetEntryLabel()); } // Load the java.lang.ref.Reference class, use the output register as a temporary. diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc index 60fb71d952..ffd4d2b582 100644 --- a/runtime/gc/allocation_record.cc +++ b/runtime/gc/allocation_record.cc @@ -23,6 +23,7 @@ #include "obj_ptr-inl.h" #include "object_callbacks.h" #include "stack.h" +#include "thread-inl.h" // For GetWeakRefAccessEnabled(). #include <android-base/properties.h> diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 3a60191310..34a4089702 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -1621,18 +1621,28 @@ void ConcurrentCopying::CopyingPhase() { { TimingLogger::ScopedTiming split7("Process mark stacks and References", GetTimings()); + + // Process the mark stack once in the thread local stack mode. This marks most of the live + // objects, aside from weak ref accesses with read barriers (Reference::GetReferent() and + // system weaks) that may happen concurrently while we are processing the mark stack and newly + // mark/gray objects and push refs on the mark stack. + ProcessMarkStack(); + + ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor(); + bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences(); + rp->Setup(self, this, /*concurrent=*/ true, clear_soft_references); + if (!clear_soft_references) { + // Forward as many SoftReferences as possible before inhibiting reference access. + rp->ForwardSoftReferences(GetTimings()); + } + // We transition through three mark stack modes (thread-local, shared, GC-exclusive). The - // primary reasons are the fact that we need to use a checkpoint to process thread-local mark + // primary reasons are that we need to use a checkpoint to process thread-local mark // stacks, but after we disable weak refs accesses, we can't use a checkpoint due to a deadlock // issue because running threads potentially blocking at WaitHoldingLocks, and that once we // reach the point where we process weak references, we can avoid using a lock when accessing // the GC mark stack, which makes mark stack processing more efficient. - // Process the mark stack once in the thread local stack mode. This marks most of the live - // objects, aside from weak ref accesses with read barriers (Reference::GetReferent() and system - // weaks) that may happen concurrently while we processing the mark stack and newly mark/gray - // objects and push refs on the mark stack. - ProcessMarkStack(); // Switch to the shared mark stack mode. That is, revoke and process thread-local mark stacks // for the last time before transitioning to the shared mark stack mode, which would process new // refs that may have been concurrently pushed onto the mark stack during the ProcessMarkStack() @@ -1649,6 +1659,7 @@ void ConcurrentCopying::CopyingPhase() { // forever as the checkpoint never finishes (See runtime/mutator_gc_coord.md). SwitchToSharedMarkStackMode(); CHECK(!self->GetWeakRefAccessEnabled()); + // Now that weak refs accesses are disabled, once we exhaust the shared mark stack again here // (which may be non-empty if there were refs found on thread-local mark stacks during the above // SwitchToSharedMarkStackMode() call), we won't have new refs to process, that is, mutators @@ -1656,6 +1667,7 @@ void ConcurrentCopying::CopyingPhase() { // before we process weak refs below. ProcessMarkStack(); CheckEmptyMarkStack(); + // Switch to the GC exclusive mark stack mode so that we can process the mark stack without a // lock from this point on. SwitchToGcExclusiveMarkStackMode(); @@ -1663,24 +1675,23 @@ void ConcurrentCopying::CopyingPhase() { if (kVerboseMode) { LOG(INFO) << "ProcessReferences"; } - // Process weak references. This may produce new refs to process and have them processed via - // ProcessMarkStack (in the GC exclusive mark stack mode). + // Process weak references. This also marks through finalizers. Although + // reference processing is "disabled", some accesses will proceed once we've ensured that + // objects directly reachable by the mutator are marked, i.e. before we mark through + // finalizers. ProcessReferences(self); CheckEmptyMarkStack(); + // JNI WeakGlobalRefs and most other system weaks cannot be processed until we're done marking + // through finalizers, since such references to finalizer-reachable objects must be preserved. if (kVerboseMode) { LOG(INFO) << "SweepSystemWeaks"; } SweepSystemWeaks(self); + CheckEmptyMarkStack(); + ReenableWeakRefAccess(self); if (kVerboseMode) { LOG(INFO) << "SweepSystemWeaks done"; } - // Process the mark stack here one last time because the above SweepSystemWeaks() call may have - // marked some objects (strings alive) as hash_set::Erase() can call the hash function for - // arbitrary elements in the weak intern table in InternTable::Table::SweepWeaks(). - ProcessMarkStack(); - CheckEmptyMarkStack(); - // Re-enable weak ref accesses. - ReenableWeakRefAccess(self); // Free data for class loaders that we unloaded. Runtime::Current()->GetClassLinker()->CleanupClassLoaders(); // Marking is done. Disable marking. @@ -3830,8 +3841,7 @@ void ConcurrentCopying::DelayReferenceReferent(ObjPtr<mirror::Class> klass, void ConcurrentCopying::ProcessReferences(Thread* self) { // We don't really need to lock the heap bitmap lock as we use CAS to mark in bitmaps. WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - GetHeap()->GetReferenceProcessor()->ProcessReferences( - /*concurrent=*/ true, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this); + GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings()); } void ConcurrentCopying::RevokeAllThreadLocalBuffers() { diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index e3cd1eefe9..bd5ce37b2c 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -177,11 +177,7 @@ void MarkSweep::RunPhases() { void MarkSweep::ProcessReferences(Thread* self) { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - GetHeap()->GetReferenceProcessor()->ProcessReferences( - true, - GetTimings(), - GetCurrentIteration()->GetClearSoftReferences(), - this); + GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings()); } void MarkSweep::PausePhase() { @@ -213,7 +209,9 @@ void MarkSweep::PausePhase() { Runtime::Current()->DisallowNewSystemWeaks(); // Enable the reference processing slow path, needs to be done with mutators paused since there // is no lock in the GetReferent fast path. - GetHeap()->GetReferenceProcessor()->EnableSlowPath(); + ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor(); + rp->Setup(self, this, /*concurrent=*/true, GetCurrentIteration()->GetClearSoftReferences()); + rp->EnableSlowPath(); } void MarkSweep::PreCleanCards() { diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index 0848f34d0b..53b060483f 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -145,8 +145,9 @@ void SemiSpace::InitializePhase() { void SemiSpace::ProcessReferences(Thread* self) { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - GetHeap()->GetReferenceProcessor()->ProcessReferences( - false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this); + ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor(); + rp->Setup(self, this, /*concurrent=*/false, GetCurrentIteration()->GetClearSoftReferences()); + rp->ProcessReferences(self, GetTimings()); } void SemiSpace::MarkingPhase() { diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index e34d140db4..df8b3fed64 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -32,6 +32,7 @@ #include "reflection.h" #include "scoped_thread_state_change-inl.h" #include "task_processor.h" +#include "thread-inl.h" #include "thread_pool.h" #include "well_known_classes.h" @@ -42,7 +43,6 @@ static constexpr bool kAsyncReferenceQueueAdd = false; ReferenceProcessor::ReferenceProcessor() : collector_(nullptr), - preserving_references_(false), condition_("reference processor condition", *Locks::reference_processor_lock_) , soft_reference_queue_(Locks::reference_queue_soft_references_lock_), weak_reference_queue_(Locks::reference_queue_weak_references_lock_), @@ -89,17 +89,20 @@ void ReferenceProcessor::BroadcastForSlowPath(Thread* self) { ObjPtr<mirror::Object> ReferenceProcessor::GetReferent(Thread* self, ObjPtr<mirror::Reference> reference) { - if (!kUseReadBarrier || self->GetWeakRefAccessEnabled()) { - // Under read barrier / concurrent copying collector, it's not safe to call GetReferent() when - // weak ref access is disabled as the call includes a read barrier which may push a ref onto the - // mark stack and interfere with termination of marking. - const ObjPtr<mirror::Object> referent = reference->GetReferent(); - // If the referent is null then it is already cleared, we can just return null since there is no - // scenario where it becomes non-null during the reference processing phase. - if (UNLIKELY(!SlowPathEnabled()) || referent == nullptr) { - return referent; - } + auto slow_path_required = [this, self]() { + return kUseReadBarrier ? !self->GetWeakRefAccessEnabled() : SlowPathEnabled(); + }; + if (!slow_path_required()) { + return reference->GetReferent(); + } + // If the referent is null then it is already cleared, we can just return null since there is no + // scenario where it becomes non-null during the reference processing phase. + // A read barrier may be unsafe here, and we use the result only when it's null or marked. + ObjPtr<mirror::Object> referent = reference->template GetReferent<kWithoutReadBarrier>(); + if (referent.IsNull()) { + return referent; } + bool started_trace = false; uint64_t start_millis; auto finish_trace = [](uint64_t start_millis) { @@ -112,50 +115,47 @@ ObjPtr<mirror::Object> ReferenceProcessor::GetReferent(Thread* self, }; MutexLock mu(self, *Locks::reference_processor_lock_); - while ((!kUseReadBarrier && SlowPathEnabled()) || - (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) { - ObjPtr<mirror::Object> referent = reference->GetReferent<kWithoutReadBarrier>(); - // If the referent became cleared, return it. Don't need barrier since thread roots can't get - // updated until after we leave the function due to holding the mutator lock. - if (referent == nullptr) { - if (started_trace) { - finish_trace(start_millis); + // Keeping reference_processor_lock_ blocks the broadcast when we try to reenable the fast path. + while (slow_path_required()) { + DCHECK(collector_ != nullptr); + constexpr bool kOtherReadBarrier = kUseReadBarrier && !kUseBakerReadBarrier; + if (UNLIKELY(reference->IsFinalizerReferenceInstance() + || rp_state_ == RpState::kStarting /* too early to determine mark state */ + || (kOtherReadBarrier && reference->IsPhantomReferenceInstance()))) { + // Odd cases in which it doesn't hurt to just wait, or the wait is likely to be very brief. + + // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the + // presence of threads blocking for weak ref access. + self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_); + if (!started_trace) { + ATraceBegin("GetReferent blocked"); + started_trace = true; + start_millis = MilliTime(); } - return nullptr; + condition_.WaitHoldingLocks(self); + continue; } - // Try to see if the referent is already marked by using the is_marked_callback. We can return - // it to the mutator as long as the GC is not preserving references. - if (LIKELY(collector_ != nullptr)) { - // If it's null it means not marked, but it could become marked if the referent is reachable - // by finalizer referents. So we cannot return in this case and must block. Otherwise, we - // can return it to the mutator as long as the GC is not preserving references, in which - // case only black nodes can be safely returned. If the GC is preserving references, the - // mutator could take a white field from a grey or white node and move it somewhere else - // in the heap causing corruption since this field would get swept. - // Use the cached referent instead of calling GetReferent since other threads could call - // Reference.clear() after we did the null check resulting in a null pointer being - // incorrectly passed to IsMarked. b/33569625 - ObjPtr<mirror::Object> forwarded_ref = collector_->IsMarked(referent.Ptr()); - if (forwarded_ref != nullptr) { - // Non null means that it is marked. - if (!preserving_references_ || - (LIKELY(!reference->IsFinalizerReferenceInstance()) && reference->IsUnprocessed())) { - if (started_trace) { - finish_trace(start_millis); - } - return forwarded_ref; - } - } + DCHECK(!reference->IsPhantomReferenceInstance()); + + if (rp_state_ == RpState::kInitClearingDone) { + // Reachable references have their final referent values. + break; } - // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the - // presence of threads blocking for weak ref access. - self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_); - if (!started_trace) { - ATraceBegin("GetReferent blocked"); - started_trace = true; - start_millis = MilliTime(); + // Although reference processing is not done, we can always predict the correct return value + // based on the current mark state. No additional marking from finalizers has been done, since + // we hold reference_processor_lock_, which is required to advance to kInitClearingDone. + DCHECK(rp_state_ == RpState::kInitMarkingDone); + // Re-load and re-check referent, since the current one may have been read before we acquired + // reference_lock. In particular a Reference.clear() call may have intervened. (b/33569625) + referent = reference->GetReferent<kWithoutReadBarrier>(); + ObjPtr<mirror::Object> forwarded_ref = + referent.IsNull() ? nullptr : collector_->IsMarked(referent.Ptr()); + // Either the referent was marked, and forwarded_ref is the correct return value, or it + // was not, and forwarded_ref == null, which is again the correct return value. + if (started_trace) { + finish_trace(start_millis); } - condition_.WaitHoldingLocks(self); + return forwarded_ref; } if (started_trace) { finish_trace(start_millis); @@ -163,36 +163,64 @@ ObjPtr<mirror::Object> ReferenceProcessor::GetReferent(Thread* self, return reference->GetReferent(); } -void ReferenceProcessor::StartPreservingReferences(Thread* self) { - MutexLock mu(self, *Locks::reference_processor_lock_); - preserving_references_ = true; +// Forward SoftReferences. Can be done before we disable Reference access. Only +// invoked if we are not clearing SoftReferences. +uint32_t ReferenceProcessor::ForwardSoftReferences(TimingLogger* timings) { + TimingLogger::ScopedTiming split( + concurrent_ ? "ForwardSoftReferences" : "(Paused)ForwardSoftReferences", timings); + // We used to argue that we should be smarter about doing this conditionally, but it's unclear + // that's actually better than the more predictable strategy of basically only clearing + // SoftReferences just before we would otherwise run out of memory. + uint32_t non_null_refs = soft_reference_queue_.ForwardSoftReferences(collector_); + if (ATraceEnabled()) { + static constexpr size_t kBufSize = 80; + char buf[kBufSize]; + snprintf(buf, kBufSize, "Marking for %" PRIu32 " SoftReferences", non_null_refs); + ATraceBegin(buf); + collector_->ProcessMarkStack(); + ATraceEnd(); + } else { + collector_->ProcessMarkStack(); + } + return non_null_refs; } -void ReferenceProcessor::StopPreservingReferences(Thread* self) { +void ReferenceProcessor::Setup(Thread* self, + collector::GarbageCollector* collector, + bool concurrent, + bool clear_soft_references) { + DCHECK(collector != nullptr); MutexLock mu(self, *Locks::reference_processor_lock_); - preserving_references_ = false; - // We are done preserving references, some people who are blocked may see a marked referent. - condition_.Broadcast(self); + collector_ = collector; + rp_state_ = RpState::kStarting; + concurrent_ = concurrent; + clear_soft_references_ = clear_soft_references; } // Process reference class instances and schedule finalizations. -void ReferenceProcessor::ProcessReferences(bool concurrent, - TimingLogger* timings, - bool clear_soft_references, - collector::GarbageCollector* collector) { - TimingLogger::ScopedTiming t(concurrent ? __FUNCTION__ : "(Paused)ProcessReferences", timings); - Thread* self = Thread::Current(); +// We advance rp_state_ to signal partial completion for the benefit of GetReferent. +void ReferenceProcessor::ProcessReferences(Thread* self, TimingLogger* timings) { + TimingLogger::ScopedTiming t(concurrent_ ? __FUNCTION__ : "(Paused)ProcessReferences", timings); + if (!clear_soft_references_) { + // Forward any additional SoftReferences we discovered late, now that reference access has been + // inhibited. + while (!soft_reference_queue_.IsEmpty()) { + ForwardSoftReferences(timings); + } + } { MutexLock mu(self, *Locks::reference_processor_lock_); - collector_ = collector; if (!kUseReadBarrier) { - CHECK_EQ(SlowPathEnabled(), concurrent) << "Slow path must be enabled iff concurrent"; + CHECK_EQ(SlowPathEnabled(), concurrent_) << "Slow path must be enabled iff concurrent"; } else { - // Weak ref access is enabled at Zygote compaction by SemiSpace (concurrent == false). - CHECK_EQ(!self->GetWeakRefAccessEnabled(), concurrent); + // Weak ref access is enabled at Zygote compaction by SemiSpace (concurrent_ == false). + CHECK_EQ(!self->GetWeakRefAccessEnabled(), concurrent_); } + DCHECK(rp_state_ == RpState::kStarting); + rp_state_ = RpState::kInitMarkingDone; + condition_.Broadcast(self); } - if (kIsDebugBuild && collector->IsTransactionActive()) { + if (kIsDebugBuild && collector_->IsTransactionActive()) { // In transaction mode, we shouldn't enqueue any Reference to the queues. // See DelayReferenceReferent(). DCHECK(soft_reference_queue_.IsEmpty()); @@ -200,75 +228,84 @@ void ReferenceProcessor::ProcessReferences(bool concurrent, DCHECK(finalizer_reference_queue_.IsEmpty()); DCHECK(phantom_reference_queue_.IsEmpty()); } - // Unless required to clear soft references with white references, preserve some white referents. - if (!clear_soft_references) { - TimingLogger::ScopedTiming split(concurrent ? "ForwardSoftReferences" : - "(Paused)ForwardSoftReferences", timings); - if (concurrent) { - StartPreservingReferences(self); - } - // TODO: Add smarter logic for preserving soft references. The behavior should be a conditional - // mark if the SoftReference is supposed to be preserved. - uint32_t non_null_refs = soft_reference_queue_.ForwardSoftReferences(collector); - if (ATraceEnabled()) { - static constexpr size_t kBufSize = 80; - char buf[kBufSize]; - snprintf(buf, kBufSize, "Marking for %" PRIu32 " SoftReferences", non_null_refs); - ATraceBegin(buf); - collector->ProcessMarkStack(); - ATraceEnd(); - } else { - collector->ProcessMarkStack(); - } - if (concurrent) { - StopPreservingReferences(self); - } - } // Clear all remaining soft and weak references with white referents. - soft_reference_queue_.ClearWhiteReferences(&cleared_references_, collector); - weak_reference_queue_.ClearWhiteReferences(&cleared_references_, collector); + // This misses references only reachable through finalizers. + soft_reference_queue_.ClearWhiteReferences(&cleared_references_, collector_); + weak_reference_queue_.ClearWhiteReferences(&cleared_references_, collector_); + // Defer PhantomReference processing until we've finished marking through finalizers. { - TimingLogger::ScopedTiming t2(concurrent ? "EnqueueFinalizerReferences" : - "(Paused)EnqueueFinalizerReferences", timings); - if (concurrent) { - StartPreservingReferences(self); - } + // TODO: Capture mark state of some system weaks here. If the referent was marked here, + // then it is now safe to return, since it can only refer to marked objects. If it becomes + // marked below, that is no longer guaranteed. + MutexLock mu(self, *Locks::reference_processor_lock_); + rp_state_ = RpState::kInitClearingDone; + // At this point, all mutator-accessible data is marked (black). Objects enqueued for + // finalization will only be made available to the mutator via CollectClearedReferences after + // we're fully done marking. Soft and WeakReferences accessible to the mutator have been + // processed and refer only to black objects. Thus there is no danger of the mutator getting + // access to non-black objects. Weak reference processing is still nominally suspended, + // But many kinds of references, including all java.lang.ref ones, are handled normally from + // here on. See GetReferent(). + } + { + TimingLogger::ScopedTiming t2( + concurrent_ ? "EnqueueFinalizerReferences" : "(Paused)EnqueueFinalizerReferences", timings); // Preserve all white objects with finalize methods and schedule them for finalization. FinalizerStats finalizer_stats = - finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, collector); + finalizer_reference_queue_.EnqueueFinalizerReferences(&cleared_references_, collector_); if (ATraceEnabled()) { static constexpr size_t kBufSize = 80; char buf[kBufSize]; snprintf(buf, kBufSize, "Marking from %" PRIu32 " / %" PRIu32 " finalizers", finalizer_stats.num_enqueued_, finalizer_stats.num_refs_); ATraceBegin(buf); - collector->ProcessMarkStack(); + collector_->ProcessMarkStack(); ATraceEnd(); } else { - collector->ProcessMarkStack(); - } - if (concurrent) { - StopPreservingReferences(self); + collector_->ProcessMarkStack(); } } - // Clear all finalizer referent reachable soft and weak references with white referents. - soft_reference_queue_.ClearWhiteReferences(&cleared_references_, collector); - weak_reference_queue_.ClearWhiteReferences(&cleared_references_, collector); - // Clear all phantom references with white referents. - phantom_reference_queue_.ClearWhiteReferences(&cleared_references_, collector); + + // Process all soft and weak references with white referents, where the references are reachable + // only from finalizers. It is unclear that there is any way to do this without slightly + // violating some language spec. We choose to apply normal Reference processing rules for these. + // This exposes the following issues: + // 1) In the case of an unmarked referent, we may end up enqueuing an "unreachable" reference. + // This appears unavoidable, since we need to clear the reference for safety, unless we + // mark the referent and undo finalization decisions for objects we encounter during marking. + // (Some versions of the RI seem to do something along these lines.) + // Or we could clear the reference without enqueuing it, which also seems strange and + // unhelpful. + // 2) In the case of a marked referent, we will preserve a reference to objects that may have + // been enqueued for finalization. Again fixing this would seem to involve at least undoing + // previous finalization / reference clearing decisions. (This would also mean than an object + // containing both a strong and a WeakReference to the same referent could see the + // WeakReference cleared.) + // The treatment in (2) is potentially quite dangerous, since Reference.get() can e.g. return a + // finalized object containing pointers to native objects that have already been deallocated. + // But it can be argued that this is just an instance of the broader rule that it is not safe + // for finalizers to access otherwise inaccessible finalizable objects. + soft_reference_queue_.ClearWhiteReferences(&cleared_references_, collector_, + /*report_cleared=*/ true); + weak_reference_queue_.ClearWhiteReferences(&cleared_references_, collector_, + /*report_cleared=*/ true); + + // Clear all phantom references with white referents. It's fine to do this just once here. + phantom_reference_queue_.ClearWhiteReferences(&cleared_references_, collector_); + // At this point all reference queues other than the cleared references should be empty. DCHECK(soft_reference_queue_.IsEmpty()); DCHECK(weak_reference_queue_.IsEmpty()); DCHECK(finalizer_reference_queue_.IsEmpty()); DCHECK(phantom_reference_queue_.IsEmpty()); + { MutexLock mu(self, *Locks::reference_processor_lock_); // Need to always do this since the next GC may be concurrent. Doing this for only concurrent // could result in a stale is_marked_callback_ being called before the reference processing // starts since there is a small window of time where slow_path_enabled_ is enabled but the // callback isn't yet set. - collector_ = nullptr; - if (!kUseReadBarrier && concurrent) { + if (!kUseReadBarrier && concurrent_) { // Done processing, disable the slow path and broadcast to the waiters. DisableSlowPath(self); } diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h index 51c50544bf..0f84211a87 100644 --- a/runtime/gc/reference_processor.h +++ b/runtime/gc/reference_processor.h @@ -46,19 +46,29 @@ class Heap; class ReferenceProcessor { public: ReferenceProcessor(); - void ProcessReferences(bool concurrent, - TimingLogger* timings, - bool clear_soft_references, - gc::collector::GarbageCollector* collector) + + // Initialize for a reference processing pass. Called before suspending weak + // access. + void Setup(Thread* self, + collector::GarbageCollector* collector, + bool concurrent, + bool clear_soft_references) + REQUIRES(!Locks::reference_processor_lock_); + // Enqueue all types of java.lang.ref.References, and mark through finalizers. + // Assumes there is no concurrent mutator-driven marking, i.e. all potentially + // mutator-accessible objects should be marked before this. + void ProcessReferences(Thread* self, TimingLogger* timings) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) REQUIRES(!Locks::reference_processor_lock_); + // The slow path bool is contained in the reference class object, can only be set once // Only allow setting this with mutators suspended so that we can avoid using a lock in the // GetReferent fast path as an optimization. void EnableSlowPath() REQUIRES_SHARED(Locks::mutator_lock_); void BroadcastForSlowPath(Thread* self); - // Decode the referent, may block if references are being processed. + // Decode the referent, may block if references are being processed. In the normal + // no-read-barrier or Baker-read-barrier cases, we assume reference is not a PhantomReference. ObjPtr<mirror::Object> GetReferent(Thread* self, ObjPtr<mirror::Reference> reference) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::reference_processor_lock_); // Collects the cleared references and returns a task, to be executed after FinishGC, that will @@ -78,27 +88,30 @@ class ReferenceProcessor { void ClearReferent(ObjPtr<mirror::Reference> ref) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::reference_processor_lock_); + uint32_t ForwardSoftReferences(TimingLogger* timings) + REQUIRES_SHARED(Locks::mutator_lock_); private: bool SlowPathEnabled() REQUIRES_SHARED(Locks::mutator_lock_); // Called by ProcessReferences. void DisableSlowPath(Thread* self) REQUIRES(Locks::reference_processor_lock_) REQUIRES_SHARED(Locks::mutator_lock_); - // If we are preserving references it means that some dead objects may become live, we use start - // and stop preserving to block mutators using GetReferrent from getting access to these - // referents. - void StartPreservingReferences(Thread* self) REQUIRES(!Locks::reference_processor_lock_); - void StopPreservingReferences(Thread* self) REQUIRES(!Locks::reference_processor_lock_); // Wait until reference processing is done. void WaitUntilDoneProcessingReferences(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::reference_processor_lock_); // Collector which is clearing references, used by the GetReferent to return referents which are - // already marked. - collector::GarbageCollector* collector_ GUARDED_BY(Locks::reference_processor_lock_); - // Boolean for whether or not we are preserving references (either soft references or finalizers). - // If this is true, then we cannot return a referent (see comment in GetReferent). - bool preserving_references_ GUARDED_BY(Locks::reference_processor_lock_); + // already marked. Only updated by thread currently running GC. + // Guarded by reference_processor_lock_ when not read by collector. Only the collector changes + // it. + collector::GarbageCollector* collector_; + // Reference processor state. Only valid while weak reference processing is suspended. + // Used by GetReferent and friends to return early. + enum class RpState : uint8_t { kStarting, kInitMarkingDone, kInitClearingDone }; + RpState rp_state_ GUARDED_BY(Locks::reference_processor_lock_); + bool concurrent_; // Running concurrently with mutator? Only used by GC thread. + bool clear_soft_references_; // Only used by GC thread. + // Condition that people wait on if they attempt to get the referent of a reference while // processing is in progress. Broadcast when an empty checkpoint is requested, but not for other // checkpoints or thread suspensions. See mutator_gc_coord.md. diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc index 568ca04c1d..6bdacaf18c 100644 --- a/runtime/gc/reference_queue.cc +++ b/runtime/gc/reference_queue.cc @@ -131,7 +131,8 @@ size_t ReferenceQueue::GetLength() const { } void ReferenceQueue::ClearWhiteReferences(ReferenceQueue* cleared_references, - collector::GarbageCollector* collector) { + collector::GarbageCollector* collector, + bool report_cleared) { while (!IsEmpty()) { ObjPtr<mirror::Reference> ref = DequeuePendingReference(); mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr(); @@ -145,6 +146,15 @@ void ReferenceQueue::ClearWhiteReferences(ReferenceQueue* cleared_references, ref->ClearReferent<false>(); } cleared_references->EnqueueReference(ref); + if (report_cleared) { + static bool already_reported = false; + if (!already_reported) { + // TODO: Maybe do this only if the queue is non-null? + LOG(WARNING) + << "Cleared Reference was only reachable from finalizer (only reported once)"; + already_reported = true; + } + } } // Delay disabling the read barrier until here so that the ClearReferent call above in // transaction mode will trigger the read barrier. @@ -182,22 +192,33 @@ FinalizerStats ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue* cleare } uint32_t ReferenceQueue::ForwardSoftReferences(MarkObjectVisitor* visitor) { - if (UNLIKELY(IsEmpty())) { - return 0; - } uint32_t num_refs(0); - const ObjPtr<mirror::Reference> head = list_; - ObjPtr<mirror::Reference> ref = head; + Thread* self = Thread::Current(); + static constexpr int SR_BUF_SIZE = 32; + ObjPtr<mirror::Reference> buf[SR_BUF_SIZE]; + int n_entries; + bool empty; do { - mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr(); - if (referent_addr->AsMirrorPtr() != nullptr) { - // do_atomic_update is false because mutators can't access the referent due to the weak ref - // access blocking. - visitor->MarkHeapReference(referent_addr, /*do_atomic_update=*/ false); - ++num_refs; + { + // Acquire lock only a few times and hold it as briefly as possible. + MutexLock mu(self, *lock_); + empty = IsEmpty(); + for (n_entries = 0; n_entries < SR_BUF_SIZE && !empty; ++n_entries) { + // Dequeuing the Reference here means it could possibly be enqueued again during this GC. + // That's unlikely and benign. + buf[n_entries] = DequeuePendingReference(); + empty = IsEmpty(); + } + } + for (int i = 0; i < n_entries; ++i) { + mirror::HeapReference<mirror::Object>* referent_addr = buf[i]->GetReferentReferenceAddr(); + if (referent_addr->AsMirrorPtr() != nullptr) { + visitor->MarkHeapReference(referent_addr, /*do_atomic_update=*/ true); + ++num_refs; + } + DisableReadBarrierForReference(buf[i]->AsReference()); } - ref = ref->GetPendingNext(); - } while (LIKELY(ref != head)); + } while (!empty); return num_refs; } diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h index 06243c40a2..3fda7167d4 100644 --- a/runtime/gc/reference_queue.h +++ b/runtime/gc/reference_queue.h @@ -90,17 +90,20 @@ class ReferenceQueue { collector::GarbageCollector* collector) REQUIRES_SHARED(Locks::mutator_lock_); - // Walks the reference list marking any references subject to the reference clearing policy. - // References with a black referent are removed from the list. References with white referents - // biased toward saving are blackened and also removed from the list. - // Returns the number of non-null soft references. + // Walks the reference list marking and dequeuing any references subject to the reference + // clearing policy. References with a black referent are removed from the list. References + // with white referents biased toward saving are blackened and also removed from the list. + // Returns the number of non-null soft references. May be called concurrently with + // AtomicEnqueueIfNotEnqueued(). uint32_t ForwardSoftReferences(MarkObjectVisitor* visitor) + REQUIRES(!*lock_) REQUIRES_SHARED(Locks::mutator_lock_); // Unlink the reference list clearing references objects with white referents. Cleared references // registered to a reference queue are scheduled for appending by the heap worker thread. void ClearWhiteReferences(ReferenceQueue* cleared_references, - collector::GarbageCollector* collector) + collector::GarbageCollector* collector, + bool report_cleared = false) REQUIRES_SHARED(Locks::mutator_lock_); void Dump(std::ostream& os) const REQUIRES_SHARED(Locks::mutator_lock_); @@ -109,9 +112,12 @@ class ReferenceQueue { bool IsEmpty() const { return list_ == nullptr; } + + // Clear this queue. Only safe after handing off the contents elsewhere for further processing. void Clear() { list_ = nullptr; } + mirror::Reference* GetList() REQUIRES_SHARED(Locks::mutator_lock_) { return list_; } @@ -124,8 +130,10 @@ class ReferenceQueue { // Lock, used for parallel GC reference enqueuing. It allows for multiple threads simultaneously // calling AtomicEnqueueIfNotEnqueued. Mutex* const lock_; - // The actual reference list. Only a root for the mark compact GC since it will be null for other - // GC types. Not an ObjPtr since it is accessed from multiple threads. + // The actual reference list. Only a root for the mark compact GC since it + // will be null during root marking for other GC types. Not an ObjPtr since it + // is accessed from multiple threads. Points to a singly-linked circular list + // using the pendingNext field. mirror::Reference* list_; DISALLOW_IMPLICIT_CONSTRUCTORS(ReferenceQueue); diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc index 25c45cba6a..5d4c50727e 100644 --- a/runtime/intern_table.cc +++ b/runtime/intern_table.cc @@ -32,6 +32,7 @@ #include "object_callbacks.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" +#include "thread-inl.h" namespace art { diff --git a/runtime/intern_table.h b/runtime/intern_table.h index db6016eba7..9d1720ffa5 100644 --- a/runtime/intern_table.h +++ b/runtime/intern_table.h @@ -116,7 +116,7 @@ class InternTable { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); // Only used by image writer. Special version that may not cause thread suspension since the GC - // cannot be running while we are doing image writing. Maybe be called while while holding a + // cannot be running while we are doing image writing. Maybe be called while holding a // lock since there will not be thread suspension. ObjPtr<mirror::String> InternStrongImageString(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index 1809227f2c..e9b78f3259 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -59,6 +59,7 @@ #include "obj_ptr.h" #include "stack.h" #include "thread.h" +#include "thread-inl.h" #include "unstarted_runtime.h" #include "verifier/method_verifier.h" #include "well_known_classes.h" diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 34ddfc4233..ad44ac2094 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -57,6 +57,7 @@ #include "scoped_thread_state_change-inl.h" #include "stack.h" #include "thread-current-inl.h" +#include "thread-inl.h" #include "thread_list.h" namespace art { diff --git a/runtime/jni/java_vm_ext.cc b/runtime/jni/java_vm_ext.cc index 12e6d22252..94108c69de 100644 --- a/runtime/jni/java_vm_ext.cc +++ b/runtime/jni/java_vm_ext.cc @@ -864,6 +864,10 @@ ObjPtr<mirror::Object> JavaVMExt::DecodeWeakGlobalLocked(Thread* self, IndirectR if (kDebugLocking) { Locks::jni_weak_globals_lock_->AssertHeld(self); } + // TODO: Handle the already null case without waiting. + // TODO: Otherwise we should just wait for kInitMarkingDone, and track which weak globals were + // marked at that point. We would only need one mark bit per entry in the weak_globals_ table, + // and a quick pass over that early on during reference processing. WaitForWeakGlobalsAccess(self); return weak_globals_.Get(ref); } diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index 3c1e7a0a00..c0dcf8846b 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -372,6 +372,25 @@ inline bool Thread::PushOnThreadLocalAllocationStack(mirror::Object* obj) { return false; } +inline bool Thread::GetWeakRefAccessEnabled() const { + CHECK(kUseReadBarrier); + DCHECK(this == Thread::Current()); + WeakRefAccessState s = tls32_.weak_ref_access_enabled.load(std::memory_order_acquire); + if (s == WeakRefAccessState::kVisiblyEnabled) { + return true; + } else if (s == WeakRefAccessState::kDisabled) { + return false; + } + DCHECK(s == WeakRefAccessState::kEnabled) + << "state = " << static_cast<std::underlying_type_t<WeakRefAccessState>>(s); + // The state is only changed back to DISABLED during a checkpoint. Thus no other thread can + // change the value concurrently here. No other thread reads the value we store here, so there + // is no need for a release store. + tls32_.weak_ref_access_enabled.store(WeakRefAccessState::kVisiblyEnabled, + std::memory_order_relaxed); + return true; +} + inline void Thread::SetThreadLocalAllocationStack(StackReference<mirror::Object>* start, StackReference<mirror::Object>* end) { DCHECK(Thread::Current() == this) << "Should be called by self"; diff --git a/runtime/thread.h b/runtime/thread.h index 1085a563c4..7eb57e914c 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -171,6 +171,20 @@ enum class DeoptimizationMethodType { kDefault // dex pc may or may not advance depending on other conditions. }; +// For the CC colector, normal weak reference access can be disabled on a per-thread basis, while +// processing references. After finishing, the reference processor asynchronously sets the +// per-thread flags back to kEnabled with release memory ordering semantics. Each mutator thread +// should check its flag with acquire semantics before assuming that it is enabled. However, +// that is often too expensive, so the reading thread sets it to kVisiblyEnabled after seeing it +// kEnabled. The Reference.get() intrinsic can thus read it in relaxed mode, and reread (by +// resorting to the slow path) with acquire semantics if it sees a value of kEnabled rather than +// kVisiblyEnabled. +enum class WeakRefAccessState : int32_t { + kVisiblyEnabled = 0, // Enabled, and previously read with acquire load by this thread. + kEnabled, + kDisabled +}; + // This should match RosAlloc::kNumThreadLocalSizeBrackets. static constexpr size_t kNumRosAllocThreadLocalSizeBracketsInThread = 16; @@ -994,14 +1008,13 @@ class Thread { void SetIsGcMarkingAndUpdateEntrypoints(bool is_marking); - bool GetWeakRefAccessEnabled() const { - CHECK(kUseReadBarrier); - return tls32_.weak_ref_access_enabled; - } + bool GetWeakRefAccessEnabled() const; // Only safe for current thread. void SetWeakRefAccessEnabled(bool enabled) { CHECK(kUseReadBarrier); - tls32_.weak_ref_access_enabled = enabled; + WeakRefAccessState new_state = enabled ? + WeakRefAccessState::kEnabled : WeakRefAccessState::kDisabled; + tls32_.weak_ref_access_enabled.store(new_state, std::memory_order_release); } uint32_t GetDisableThreadFlipCount() const { @@ -1672,7 +1685,7 @@ class Thread { thread_exit_check_count(0), is_transitioning_to_runnable(false), is_gc_marking(false), - weak_ref_access_enabled(true), + weak_ref_access_enabled(WeakRefAccessState::kVisiblyEnabled), disable_thread_flip_count(0), user_code_suspend_count(0), force_interpreter_count(0), @@ -1728,14 +1741,17 @@ class Thread { AtomicInteger park_state_; - // True if the thread is allowed to access a weak ref (Reference::GetReferent() and system - // weaks) and to potentially mark an object alive/gray. This is used for concurrent reference - // processing of the CC collector only. This is thread local so that we can enable/disable weak - // ref access by using a checkpoint and avoid a race around the time weak ref access gets - // disabled and concurrent reference processing begins (if weak ref access is disabled during a - // pause, this is not an issue.) Other collectors use Runtime::DisallowNewSystemWeaks() and - // ReferenceProcessor::EnableSlowPath(). - bool32_t weak_ref_access_enabled; + // Determines whether the thread is allowed to directly access a weak ref + // (Reference::GetReferent() and system weaks) and to potentially mark an object alive/gray. + // This is used for concurrent reference processing of the CC collector only. This is thread + // local so that we can enable/disable weak ref access by using a checkpoint and avoid a race + // around the time weak ref access gets disabled and concurrent reference processing begins + // (if weak ref access is disabled during a pause, this is not an issue.) Other collectors use + // Runtime::DisallowNewSystemWeaks() and ReferenceProcessor::EnableSlowPath(). Can be + // concurrently accessed by GetReferent() and set (by iterating over threads). + // Can be changed from kEnabled to kVisiblyEnabled by readers. No other concurrent access is + // possible when that happens. + mutable std::atomic<WeakRefAccessState> weak_ref_access_enabled; // A thread local version of Heap::disable_thread_flip_count_. This keeps track of how many // levels of (nested) JNI critical sections the thread is in and is used to detect a nested JNI |