diff options
author | 2023-12-19 18:48:15 +0000 | |
---|---|---|
committer | 2023-12-19 20:32:27 +0000 | |
commit | 8bc6a58df7046b4d6f4b51eb274c7e60fea396ff (patch) | |
tree | ac6aa639279f5cf2048cb147a91cbea98c8088a4 /runtime/gc | |
parent | ee7471ec0a7aba338c3ac90de0f2ef0be9a35fed (diff) |
Revert^17 "Thread suspension cleanup and deadlock fix"
This reverts commit c6371b52df0da31acc174a3526274417b7aac0a7.
Reason for revert: This seems to have two remaining issues:
1. The second DCHECK in WaitForFlipFunction is not completely guaranteed to hold, resulting in failures for 658-fp-read-barrier.
2. WaitForSuspendBarrier seems to time out occasionally, possibly spuriously so. We fail when the futex times out once. That's probably incompatible with the app freezer. We should retry a few times.
Change-Id: Ibd8909b31083fc29e6d4f1fcde003d08eb16fc0a
Diffstat (limited to 'runtime/gc')
-rw-r--r-- | runtime/gc/collector/concurrent_copying.cc | 11 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.cc | 8 | ||||
-rw-r--r-- | runtime/gc/heap.cc | 211 | ||||
-rw-r--r-- | runtime/gc/task_processor.cc | 28 | ||||
-rw-r--r-- | runtime/gc/task_processor.h | 8 |
5 files changed, 120 insertions, 146 deletions
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 90d8cd4f4d..370e01b61e 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -480,8 +480,7 @@ class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor } void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) { - // We are either running this in the target thread, or the target thread will wait for us - // before switching back to runnable. + // Note: self is not necessarily equal to thread since thread may be suspended. Thread* self = Thread::Current(); CHECK(thread == self || thread->GetState() != ThreadState::kRunnable) << thread->GetState() << " thread " << thread << " self " << self; @@ -496,6 +495,7 @@ class ConcurrentCopying::ThreadFlipVisitor : public Closure, public RootVisitor // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots // only. thread->VisitRoots(this, kVisitRootFlagAllRoots); + concurrent_copying_->GetBarrier().Pass(self); } void VisitRoots(mirror::Object*** roots, @@ -764,12 +764,17 @@ void ConcurrentCopying::FlipThreadRoots() { } Thread* self = Thread::Current(); Locks::mutator_lock_->AssertNotHeld(self); + gc_barrier_->Init(self, 0); ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_); FlipCallback flip_callback(this); - Runtime::Current()->GetThreadList()->FlipThreadRoots( + size_t barrier_count = Runtime::Current()->GetThreadList()->FlipThreadRoots( &thread_flip_visitor, &flip_callback, this, GetHeap()->GetGcPauseListener()); + { + ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun); + gc_barrier_->Increment(self, barrier_count); + } is_asserting_to_space_invariant_ = true; QuasiAtomic::ThreadFenceForConstructor(); // TODO: Remove? if (kVerboseMode) { diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 27bb5f26a6..4da7fafce6 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -754,6 +754,7 @@ class MarkCompact::ThreadFlipVisitor : public Closure { CHECK(collector_->compacting_); thread->SweepInterpreterCache(collector_); thread->AdjustTlab(collector_->black_objs_slide_diff_); + collector_->GetBarrier().Pass(self); } private: @@ -801,10 +802,15 @@ void MarkCompact::RunPhases() { { // Compaction pause + gc_barrier_.Init(self, 0); ThreadFlipVisitor visitor(this); FlipCallback callback(this); - runtime->GetThreadList()->FlipThreadRoots( + size_t barrier_count = runtime->GetThreadList()->FlipThreadRoots( &visitor, &callback, this, GetHeap()->GetGcPauseListener()); + { + ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun); + gc_barrier_.Increment(self, barrier_count); + } } if (IsValidFd(uffd_)) { diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 4671b75388..b4f703ea07 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -2730,122 +2730,113 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, } ScopedThreadStateChange tsc(self, ThreadState::kWaitingPerformingGc); Locks::mutator_lock_->AssertNotHeld(self); - SelfDeletingTask* clear; // Unconditionally set below. + if (self->IsHandlingStackOverflow()) { + // If we are throwing a stack overflow error we probably don't have enough remaining stack + // space to run the GC. + // Count this as a GC in case someone is waiting for it to complete. + gcs_completed_.fetch_add(1, std::memory_order_release); + return collector::kGcTypeNone; + } + bool compacting_gc; { - // We should not ever become runnable and re-suspend while executing a GC. - // This would likely cause a deadlock if we acted on a suspension request. - // TODO: We really want to assert that we don't transition to kRunnable. - ScopedAssertNoThreadSuspension("Performing GC"); - if (self->IsHandlingStackOverflow()) { - // If we are throwing a stack overflow error we probably don't have enough remaining stack - // space to run the GC. - // Count this as a GC in case someone is waiting for it to complete. + gc_complete_lock_->AssertNotHeld(self); + ScopedThreadStateChange tsc2(self, ThreadState::kWaitingForGcToComplete); + MutexLock mu(self, *gc_complete_lock_); + // Ensure there is only one GC at a time. + WaitForGcToCompleteLocked(gc_cause, self); + if (requested_gc_num != GC_NUM_ANY && !GCNumberLt(GetCurrentGcNum(), requested_gc_num)) { + // The appropriate GC was already triggered elsewhere. + return collector::kGcTypeNone; + } + compacting_gc = IsMovingGc(collector_type_); + // GC can be disabled if someone has a used GetPrimitiveArrayCritical. + if (compacting_gc && disable_moving_gc_count_ != 0) { + LOG(WARNING) << "Skipping GC due to disable moving GC count " << disable_moving_gc_count_; + // Again count this as a GC. gcs_completed_.fetch_add(1, std::memory_order_release); return collector::kGcTypeNone; } - bool compacting_gc; - { - gc_complete_lock_->AssertNotHeld(self); - ScopedThreadStateChange tsc2(self, ThreadState::kWaitingForGcToComplete); - MutexLock mu(self, *gc_complete_lock_); - // Ensure there is only one GC at a time. - WaitForGcToCompleteLocked(gc_cause, self); - if (requested_gc_num != GC_NUM_ANY && !GCNumberLt(GetCurrentGcNum(), requested_gc_num)) { - // The appropriate GC was already triggered elsewhere. - return collector::kGcTypeNone; - } - compacting_gc = IsMovingGc(collector_type_); - // GC can be disabled if someone has a used GetPrimitiveArrayCritical. - if (compacting_gc && disable_moving_gc_count_ != 0) { - LOG(WARNING) << "Skipping GC due to disable moving GC count " << disable_moving_gc_count_; - // Again count this as a GC. - gcs_completed_.fetch_add(1, std::memory_order_release); - return collector::kGcTypeNone; - } - if (gc_disabled_for_shutdown_) { - gcs_completed_.fetch_add(1, std::memory_order_release); - return collector::kGcTypeNone; - } - collector_type_running_ = collector_type_; - last_gc_cause_ = gc_cause; - } - if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) { - ++runtime->GetStats()->gc_for_alloc_count; - ++self->GetStats()->gc_for_alloc_count; - } - const size_t bytes_allocated_before_gc = GetBytesAllocated(); - - DCHECK_LT(gc_type, collector::kGcTypeMax); - DCHECK_NE(gc_type, collector::kGcTypeNone); - - collector::GarbageCollector* collector = nullptr; - // TODO: Clean this up. - if (compacting_gc) { - DCHECK(current_allocator_ == kAllocatorTypeBumpPointer || - current_allocator_ == kAllocatorTypeTLAB || - current_allocator_ == kAllocatorTypeRegion || - current_allocator_ == kAllocatorTypeRegionTLAB); - switch (collector_type_) { - case kCollectorTypeSS: - semi_space_collector_->SetFromSpace(bump_pointer_space_); - semi_space_collector_->SetToSpace(temp_space_); - semi_space_collector_->SetSwapSemiSpaces(true); - collector = semi_space_collector_; - break; - case kCollectorTypeCMC: - collector = mark_compact_; - break; - case kCollectorTypeCC: - collector::ConcurrentCopying* active_cc_collector; - if (use_generational_cc_) { - // TODO: Other threads must do the flip checkpoint before they start poking at - // active_concurrent_copying_collector_. So we should not concurrency here. - active_cc_collector = (gc_type == collector::kGcTypeSticky) ? - young_concurrent_copying_collector_ : - concurrent_copying_collector_; - active_concurrent_copying_collector_.store(active_cc_collector, - std::memory_order_relaxed); - DCHECK(active_cc_collector->RegionSpace() == region_space_); - collector = active_cc_collector; - } else { - collector = active_concurrent_copying_collector_.load(std::memory_order_relaxed); - } - break; - default: - LOG(FATAL) << "Invalid collector type " << static_cast<size_t>(collector_type_); - } - // temp_space_ will be null for kCollectorTypeCMC. - if (temp_space_ != nullptr && - collector != active_concurrent_copying_collector_.load(std::memory_order_relaxed)) { - temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); - if (kIsDebugBuild) { - // Try to read each page of the memory map in case mprotect didn't work properly - // b/19894268. - temp_space_->GetMemMap()->TryReadable(); + if (gc_disabled_for_shutdown_) { + gcs_completed_.fetch_add(1, std::memory_order_release); + return collector::kGcTypeNone; + } + collector_type_running_ = collector_type_; + last_gc_cause_ = gc_cause; + } + if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) { + ++runtime->GetStats()->gc_for_alloc_count; + ++self->GetStats()->gc_for_alloc_count; + } + const size_t bytes_allocated_before_gc = GetBytesAllocated(); + + DCHECK_LT(gc_type, collector::kGcTypeMax); + DCHECK_NE(gc_type, collector::kGcTypeNone); + + collector::GarbageCollector* collector = nullptr; + // TODO: Clean this up. + if (compacting_gc) { + DCHECK(current_allocator_ == kAllocatorTypeBumpPointer || + current_allocator_ == kAllocatorTypeTLAB || + current_allocator_ == kAllocatorTypeRegion || + current_allocator_ == kAllocatorTypeRegionTLAB); + switch (collector_type_) { + case kCollectorTypeSS: + semi_space_collector_->SetFromSpace(bump_pointer_space_); + semi_space_collector_->SetToSpace(temp_space_); + semi_space_collector_->SetSwapSemiSpaces(true); + collector = semi_space_collector_; + break; + case kCollectorTypeCMC: + collector = mark_compact_; + break; + case kCollectorTypeCC: + collector::ConcurrentCopying* active_cc_collector; + if (use_generational_cc_) { + // TODO: Other threads must do the flip checkpoint before they start poking at + // active_concurrent_copying_collector_. So we should not concurrency here. + active_cc_collector = (gc_type == collector::kGcTypeSticky) ? + young_concurrent_copying_collector_ : concurrent_copying_collector_; + active_concurrent_copying_collector_.store(active_cc_collector, + std::memory_order_relaxed); + DCHECK(active_cc_collector->RegionSpace() == region_space_); + collector = active_cc_collector; + } else { + collector = active_concurrent_copying_collector_.load(std::memory_order_relaxed); } - CHECK(temp_space_->IsEmpty()); + break; + default: + LOG(FATAL) << "Invalid collector type " << static_cast<size_t>(collector_type_); + } + // temp_space_ will be null for kCollectorTypeCMC. + if (temp_space_ != nullptr + && collector != active_concurrent_copying_collector_.load(std::memory_order_relaxed)) { + temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); + if (kIsDebugBuild) { + // Try to read each page of the memory map in case mprotect didn't work properly b/19894268. + temp_space_->GetMemMap()->TryReadable(); } - } else if (current_allocator_ == kAllocatorTypeRosAlloc || - current_allocator_ == kAllocatorTypeDlMalloc) { - collector = FindCollectorByGcType(gc_type); - } else { - LOG(FATAL) << "Invalid current allocator " << current_allocator_; + CHECK(temp_space_->IsEmpty()); } - - CHECK(collector != nullptr) << "Could not find garbage collector with collector_type=" - << static_cast<size_t>(collector_type_) - << " and gc_type=" << gc_type; - collector->Run(gc_cause, clear_soft_references || runtime->IsZygote()); - IncrementFreedEver(); - RequestTrim(self); - // Collect cleared references. - clear = reference_processor_->CollectClearedReferences(self); - // Grow the heap so that we know when to perform the next GC. - GrowForUtilization(collector, bytes_allocated_before_gc); - old_native_bytes_allocated_.store(GetNativeBytes()); - LogGC(gc_cause, collector); - FinishGC(self, gc_type); + } else if (current_allocator_ == kAllocatorTypeRosAlloc || + current_allocator_ == kAllocatorTypeDlMalloc) { + collector = FindCollectorByGcType(gc_type); + } else { + LOG(FATAL) << "Invalid current allocator " << current_allocator_; } + + CHECK(collector != nullptr) + << "Could not find garbage collector with collector_type=" + << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type; + collector->Run(gc_cause, clear_soft_references || runtime->IsZygote()); + IncrementFreedEver(); + RequestTrim(self); + // Collect cleared references. + SelfDeletingTask* clear = reference_processor_->CollectClearedReferences(self); + // Grow the heap so that we know when to perform the next GC. + GrowForUtilization(collector, bytes_allocated_before_gc); + old_native_bytes_allocated_.store(GetNativeBytes()); + LogGC(gc_cause, collector); + FinishGC(self, gc_type); // Actually enqueue all cleared references. Do this after the GC has officially finished since // otherwise we can deadlock. clear->Run(self); @@ -3612,7 +3603,7 @@ collector::GcType Heap::WaitForGcToCompleteLocked(GcCause cause, Thread* self) { GcCause last_gc_cause = kGcCauseNone; uint64_t wait_start = NanoTime(); while (collector_type_running_ != kCollectorTypeNone) { - if (!task_processor_->IsRunningThread(self)) { + if (self != task_processor_->GetRunningThread()) { // The current thread is about to wait for a currently running // collection to finish. If the waiting thread is not the heap // task daemon thread, the currently running collection is @@ -3632,7 +3623,7 @@ collector::GcType Heap::WaitForGcToCompleteLocked(GcCause cause, Thread* self) { LOG(INFO) << "WaitForGcToComplete blocked " << cause << " on " << last_gc_cause << " for " << PrettyDuration(wait_time); } - if (!task_processor_->IsRunningThread(self)) { + if (self != task_processor_->GetRunningThread()) { // The current thread is about to run a collection. If the thread // is not the heap task daemon thread, it's considered as a // blocking GC (i.e., blocking itself). diff --git a/runtime/gc/task_processor.cc b/runtime/gc/task_processor.cc index e56dbd17c3..494cf2bb79 100644 --- a/runtime/gc/task_processor.cc +++ b/runtime/gc/task_processor.cc @@ -103,31 +103,9 @@ bool TaskProcessor::IsRunning() const { return is_running_; } -bool TaskProcessor::WaitForThread(Thread* self) { - // Waiting for too little time here may cause us to fail to get stack traces, since we can't - // safely do so without identifying a HeapTaskDaemon to avoid it. Waiting too long could - // conceivably deadlock if we somehow try to get a stack trace on the way to starting the - // HeapTaskDaemon. Under normal circumstances. this should terminate immediately, since - // HeapTaskDaemon should normally be running. - constexpr int kTotalWaitMillis = 100; - for (int i = 0; i < kTotalWaitMillis; ++i) { - if (is_running_) { - return true; - } - cond_.TimedWait(self, 1 /*msecs*/, 0 /*nsecs*/); - } - LOG(ERROR) << "No identifiable HeapTaskDaemon; unsafe to get thread stacks."; - return false; -} - -bool TaskProcessor::IsRunningThread(Thread* t, bool wait) { - Thread* self = Thread::Current(); - MutexLock mu(self, lock_); - if (wait && !WaitForThread(self)) { - // If Wait failed, either answer may be correct; in our case, true is safer. - return true; - } - return running_thread_ == t; +Thread* TaskProcessor::GetRunningThread() const { + MutexLock mu(Thread::Current(), lock_); + return running_thread_; } void TaskProcessor::Stop(Thread* self) { diff --git a/runtime/gc/task_processor.h b/runtime/gc/task_processor.h index b9e6938b09..86e36ab6d6 100644 --- a/runtime/gc/task_processor.h +++ b/runtime/gc/task_processor.h @@ -64,15 +64,9 @@ class TaskProcessor { bool IsRunning() const REQUIRES(!lock_); void UpdateTargetRunTime(Thread* self, HeapTask* target_time, uint64_t new_target_time) REQUIRES(!lock_); - // Is the given thread the task processor thread? - // If wait is true, and no thread has been registered via Start(), we briefly - // wait for one to be registered. If we time out, we return true. - bool IsRunningThread(Thread* t, bool wait = false) REQUIRES(!lock_); + Thread* GetRunningThread() const REQUIRES(!lock_); private: - // Wait briefly for running_thread_ to become non-null. Return false on timeout. - bool WaitForThread(Thread* self) REQUIRES(lock_); - class CompareByTargetRunTime { public: bool operator()(const HeapTask* a, const HeapTask* b) const { |