From 858f1c5fd5e528d0b16040ced74d4636046a42d8 Mon Sep 17 00:00:00 2001 From: Mathieu Chartier Date: Wed, 17 Oct 2012 17:45:55 -0700 Subject: Check point root marking. Added thread list checkpoint function, this goes through every thread and runs the checkpoint on each thread. Threads that are runnable run the checkpoint callback themselves in the next suspend check, while suspended threads are left suspended but have the callback called on them. Added a checkpoint visitor member to each thread, this visitor called when the checkpoint request flag is set during transitions to suspended from runnable. Using the checkpoint to mark the roots reduces the first pause of partial / full gc to around 1 ms. Change-Id: I97239cc72ee0e4a3397e9138a62ee559268dce0a --- src/gc/barrier.cc | 46 +++++++++++++++ src/gc/barrier.h | 56 ++++++++++++++++++ src/gc/card_table.h | 2 +- src/gc/mark_sweep.cc | 47 +++++++++++++-- src/gc/mark_sweep.h | 17 +++++- src/heap.cc | 117 +++++++++++++++++++------------------- src/oat/runtime/support_thread.cc | 18 +++++- src/runtime.cc | 8 ++- src/runtime.h | 3 + src/scoped_thread_state_change.h | 1 + src/signal_catcher.cc | 7 ++- src/thread.cc | 29 +++++++++- src/thread.h | 39 +++++++++---- src/thread_list.cc | 73 ++++++++++++++++++++++++ src/thread_list.h | 6 ++ 15 files changed, 384 insertions(+), 85 deletions(-) create mode 100644 src/gc/barrier.cc create mode 100644 src/gc/barrier.h (limited to 'src') diff --git a/src/gc/barrier.cc b/src/gc/barrier.cc new file mode 100644 index 0000000000..aa9433bb06 --- /dev/null +++ b/src/gc/barrier.cc @@ -0,0 +1,46 @@ +#include "barrier.h" +#include "../mutex.h" +#include "thread.h" + +namespace art { + +Barrier::Barrier() + : count_(0), + lock_("GC barrier lock"), + condition_("GC barrier condition", lock_) { +} + +void Barrier::Pass(Thread* self) { + MutexLock mu(self, lock_); + SetCountLocked(self, count_ - 1); +} + +void Barrier::Wait(Thread* self) { + Increment(self, -1); +} + +void Barrier::Init(Thread* self, int count) { + MutexLock mu(self, lock_); + SetCountLocked(self, count); +} + +void Barrier::Increment(Thread* self, int delta) { + MutexLock mu(self, lock_); + SetCountLocked(self, count_ + delta); + if (count_ != 0) { + condition_.Wait(self); + } +} + +void Barrier::SetCountLocked(Thread* self, int count) { + count_ = count; + if (count_ == 0) { + condition_.Broadcast(self); + } +} + +Barrier::~Barrier() { + CHECK(!count_) << "Attempted to destory barrier with non zero count"; +} + +} diff --git a/src/gc/barrier.h b/src/gc/barrier.h new file mode 100644 index 0000000000..207536a688 --- /dev/null +++ b/src/gc/barrier.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_SRC_GC_BARRIER_H_ +#define ART_SRC_GC_BARRIER_H_ + +#include "../mutex.h" +#include "locks.h" +#include "UniquePtr.h" + +namespace art { + +class Barrier { + public: + Barrier(); + virtual ~Barrier(); + + // Pass through the barrier, decrements the count but does not block. + void Pass(Thread* self); + + // Wait on the barrier, decrement the count. + void Wait(Thread* self); + + // Set the count to a new value, if the value is 0 then everyone waiting on the condition + // variable is resumed. + void Init(Thread* self, int count); + + // Increment the count by delta, wait on condition if count is non zero. + void Increment(Thread* self, int delta); + + private: + void SetCountLocked(Thread* self, int count) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // Counter, when this reaches 0 all people blocked on the barrier are signalled. + int count_ GUARDED_BY(lock_); + + Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + ConditionVariable condition_ GUARDED_BY(lock_); +}; + +} // namespace art +#endif // ART_SRC_GC_BARRIER_H_ + diff --git a/src/gc/card_table.h b/src/gc/card_table.h index 2715f527e6..035c073577 100644 --- a/src/gc/card_table.h +++ b/src/gc/card_table.h @@ -160,4 +160,4 @@ class CardTable { }; } // namespace art -#endif // DALVIK_ALLOC_CARDTABLE_H_ +#endif // ART_SRC_GC_CARDTABLE_H_ diff --git a/src/gc/mark_sweep.cc b/src/gc/mark_sweep.cc index 980eed1d67..86373702bf 100644 --- a/src/gc/mark_sweep.cc +++ b/src/gc/mark_sweep.cc @@ -19,6 +19,7 @@ #include #include +#include "barrier.h" #include "card_table.h" #include "class_loader.h" #include "dex_cache.h" @@ -37,10 +38,10 @@ #include "thread.h" #include "thread_list.h" -static const bool kUseMarkStackPrefetch = true; - namespace art { +static const bool kUseMarkStackPrefetch = true; + class SetFingerVisitor { public: SetFingerVisitor(MarkSweep* const mark_sweep) : mark_sweep_(mark_sweep) { @@ -68,7 +69,8 @@ MarkSweep::MarkSweep(ObjectStack* mark_stack) phantom_reference_list_(NULL), cleared_reference_list_(NULL), freed_bytes_(0), freed_objects_(0), - class_count_(0), array_count_(0), other_count_(0) { + class_count_(0), array_count_(0), other_count_(0), + gc_barrier_(new Barrier) { DCHECK(mark_stack_ != NULL); } @@ -200,6 +202,10 @@ void MarkSweep::MarkRoots() { Runtime::Current()->VisitNonConcurrentRoots(MarkObjectVisitor, this); } +void MarkSweep::MarkNonThreadRoots() { + Runtime::Current()->VisitNonThreadRoots(MarkObjectVisitor, this); +} + void MarkSweep::MarkConcurrentRoots() { Runtime::Current()->VisitConcurrentRoots(MarkObjectVisitor, this); } @@ -519,6 +525,38 @@ struct SweepCallbackContext { Thread* self; }; +class CheckpointMarkThreadRoots : public Thread::CheckpointFunction { + public: + CheckpointMarkThreadRoots(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) { + + } + + virtual void Run(Thread* thread) NO_THREAD_SAFETY_ANALYSIS { + // Note: self is not necessarily equal to thread since thread may be suspended. + Thread* self = Thread::Current(); + DCHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc); + WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); + thread->VisitRoots(MarkSweep::MarkObjectVisitor, mark_sweep_); + mark_sweep_->GetBarrier().Pass(self); + } + + private: + MarkSweep* mark_sweep_; +}; + +Barrier& MarkSweep::GetBarrier() { + return *gc_barrier_; +} + +void MarkSweep::MarkRootsCheckpoint() { + UniquePtr check_point(new CheckpointMarkThreadRoots(this)); + ThreadList* thread_list = Runtime::Current()->GetThreadList(); + // Increment the count of the barrier. If all of the checkpoints have already been finished then + // will hit 0 and continue. Otherwise we are still waiting for some checkpoints, so the counter + // will go positive and we will unblock when it hits zero. + gc_barrier_->Increment(Thread::Current(), thread_list->RunCheckpoint(check_point.get())); +} + void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) { size_t freed_objects = num_ptrs; size_t freed_bytes = 0; @@ -538,8 +576,7 @@ void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) { freed_bytes += space->FreeList(self, num_ptrs, ptrs); } else { for (size_t i = 0; i < num_ptrs; ++i) { - Object* obj = static_cast(ptrs[i]); - freed_bytes += space->Free(self, obj); + freed_bytes += space->Free(self, ptrs[i]); } } diff --git a/src/gc/mark_sweep.h b/src/gc/mark_sweep.h index ed74f993fc..d64439f4e3 100644 --- a/src/gc/mark_sweep.h +++ b/src/gc/mark_sweep.h @@ -25,6 +25,7 @@ namespace art { +class Barrier; class CheckObjectVisitor; class Class; class Heap; @@ -52,9 +53,15 @@ class MarkSweep { EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void MarkNonThreadRoots() + EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); + void MarkConcurrentRoots(); EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); + void MarkRootsCheckpoint(); + EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); + // Verify that image roots point to only marked objects within the alloc space. void VerifyImageRoots() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); @@ -183,6 +190,11 @@ class MarkSweep { } } + static void MarkObjectVisitor(const Object* root, void* arg) + EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); + + Barrier& GetBarrier(); + private: // Returns true if the object has its bit set in the mark bitmap. bool IsMarked(const Object* object) const; @@ -193,9 +205,6 @@ class MarkSweep { static bool IsMarkedArrayCallback(const Object* object, void* arg) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); - static void MarkObjectVisitor(const Object* root, void* arg) - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); - static void ReMarkObjectVisitor(const Object* root, void* arg) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); @@ -417,6 +426,8 @@ class MarkSweep { size_t array_count_; size_t other_count_; + UniquePtr gc_barrier_; + friend class AddIfReachesAllocSpaceVisitor; // Used by mod-union table. friend class CheckBitmapVisitor; friend class CheckObjectVisitor; diff --git a/src/heap.cc b/src/heap.cc index 584c5b2e69..33f83660bb 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -1509,16 +1509,12 @@ void Heap::ClearCards(TimingLogger& timings) { void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, GcCause gc_cause, bool clear_soft_references) { TimingLogger timings("ConcurrentCollectGarbageInternal", true); - uint64_t root_begin = NanoTime(), root_end = 0, dirty_begin = 0, dirty_end = 0; + uint64_t gc_begin = NanoTime(), root_begin = 0, root_end = 0, dirty_begin = 0, dirty_end = 0; std::stringstream gc_type_str; gc_type_str << gc_type << " "; - // Suspend all threads are get exclusive access to the heap. ThreadList* thread_list = Runtime::Current()->GetThreadList(); - thread_list->SuspendAll(); - timings.AddSplit("SuspendAll"); - Locks::mutator_lock_->AssertExclusiveHeld(self); - + std::vector dirty_cards; size_t bytes_freed = 0; Object* cleared_references = NULL; { @@ -1528,48 +1524,9 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G mark_sweep.Init(); timings.AddSplit("Init"); - if (verify_pre_gc_heap_) { - WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - if (!VerifyHeapReferences()) { - LOG(FATAL) << "Pre " << gc_type_str.str() << "Gc verification failed"; - } - timings.AddSplit("VerifyHeapReferencesPreGC"); - } - - // Swap the stacks, this is safe since all the mutators are suspended at this point. - SwapStacks(); - - // Check that all objects which reference things in the live stack are on dirty cards. - if (verify_missing_card_marks_) { - ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); - // Sort the live stack so that we can quickly binary search it later. - std::sort(live_stack_->Begin(), live_stack_->End()); - if (!VerifyMissingCardMarks()) { - LOG(FATAL) << "Pre GC verification of missing card marks failed"; - } - } - - // We will need to know which cards were dirty for doing concurrent processing of dirty cards. - // TODO: Investigate using a mark stack instead of a vector. - std::vector dirty_cards; - if (gc_type == kGcTypeSticky) { - dirty_cards.reserve(4 * KB); - for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) { - card_table_->GetDirtyCards(*it, dirty_cards); - } - timings.AddSplit("GetDirtyCards"); - } - - // Clear image space cards and keep track of cards we cleared in the mod-union table. - ClearCards(timings); - { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - for (Object** it = live_stack_->Begin(); it != live_stack_->End(); ++it) { - DCHECK(!GetLiveBitmap()->Test(*it)); - } - if (gc_type == kGcTypePartial) { // Copy the mark bits over from the live bits, do this as early as possible or else we can // accidentally un-mark roots. @@ -1595,25 +1552,65 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G reinterpret_cast(alloc_space_->Begin())); } mark_sweep.FindDefaultMarkBitmap(); + } - // Marking roots is not necessary for sticky mark bits since we only actually require the - // remarking of roots. - if (gc_type != kGcTypeSticky) { - mark_sweep.MarkRoots(); - timings.AddSplit("MarkRoots"); + mark_sweep.MarkRootsCheckpoint(); + timings.AddSplit("MarkRootsCheckpoint"); + + { + root_begin = NanoTime(); + + // Suspend all threads are get exclusive access to the heap. + thread_list->SuspendAll(); + timings.AddSplit("SuspendAll"); + Locks::mutator_lock_->AssertExclusiveHeld(self); + + if (verify_pre_gc_heap_) { + WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); + if (!VerifyHeapReferences()) { + LOG(FATAL) << "Pre " << gc_type_str.str() << "Gc verification failed"; + } + timings.AddSplit("VerifyHeapReferencesPreGC"); } - if (verify_mod_union_table_) { - zygote_mod_union_table_->Update(); - zygote_mod_union_table_->Verify(); - mod_union_table_->Update(); - mod_union_table_->Verify(); + // Swap the stacks, this is safe since all the mutators are suspended at this point. + SwapStacks(); + + // Check that all objects which reference things in the live stack are on dirty cards. + if (verify_missing_card_marks_) { + ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); + // Sort the live stack so that we can quickly binary search it later. + std::sort(live_stack_->Begin(), live_stack_->End()); + if (!VerifyMissingCardMarks()) { + LOG(FATAL) << "Pre GC verification of missing card marks failed"; + } } + + // We will need to know which cards were dirty for doing concurrent processing of dirty cards. + // TODO: Investigate using a mark stack instead of a vector. + if (gc_type == kGcTypeSticky) { + dirty_cards.reserve(4 * KB); + for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) { + card_table_->GetDirtyCards(*it, dirty_cards); + } + timings.AddSplit("GetDirtyCards"); + } + + // Clear image space cards and keep track of cards we cleared in the mod-union table. + ClearCards(timings); } // Roots are marked on the bitmap and the mark_stack is empty. DCHECK(mark_stack_->IsEmpty()); + if (verify_mod_union_table_) { + ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_); + zygote_mod_union_table_->Update(); + zygote_mod_union_table_->Verify(); + mod_union_table_->Update(); + mod_union_table_->Verify(); + } + // Allow mutators to go again, acquire share on mutator_lock_ to continue. thread_list->ResumeAll(); { @@ -1622,11 +1619,11 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G timings.AddSplit("RootEnd"); // Mark the roots which we can do concurrently. + WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); mark_sweep.MarkConcurrentRoots(); timings.AddSplit("MarkConcurrentRoots"); - - WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - UpdateAndMarkModUnion(&mark_sweep, timings, gc_type); + mark_sweep.MarkNonThreadRoots(); + timings.AddSplit("MarkNonThreadRoots"); if (gc_type != kGcTypeSticky) { // Mark everything allocated since the last as GC live so that we can sweep concurrently, @@ -1636,6 +1633,8 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G timings.AddSplit("MarkStackAsLive"); } + UpdateAndMarkModUnion(&mark_sweep, timings, gc_type); + if (gc_type != kGcTypeSticky) { // Recursively mark all the non-image bits set in the mark bitmap. mark_sweep.RecursiveMark(gc_type == kGcTypePartial, timings); @@ -1775,7 +1774,7 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G // If the GC was slow, then print timings in the log. uint64_t pause_roots = (root_end - root_begin) / 1000 * 1000; uint64_t pause_dirty = (dirty_end - dirty_begin) / 1000 * 1000; - uint64_t duration = (NanoTime() - root_begin) / 1000 * 1000; + uint64_t duration = (NanoTime() - gc_begin) / 1000 * 1000; total_paused_time_ += pause_roots + pause_dirty; if (pause_roots > MsToNs(5) || pause_dirty > MsToNs(5) || (gc_cause == kGcCauseForAlloc && duration > MsToNs(20))) { diff --git a/src/oat/runtime/support_thread.cc b/src/oat/runtime/support_thread.cc index 2eef424816..c33b42e23f 100644 --- a/src/oat/runtime/support_thread.cc +++ b/src/oat/runtime/support_thread.cc @@ -20,18 +20,32 @@ namespace art { +static void CheckSuspend(Thread* thread) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (;;) { + if (thread->ReadFlag(kCheckpointRequest)) { + thread->RunCheckpointFunction(); + thread->AtomicClearFlag(kCheckpointRequest); + } else if (thread->ReadFlag(kSuspendRequest)) { + thread->FullSuspendCheck(); + } else { + break; + } + } +} + void CheckSuspendFromCode(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { // Called when thread->suspend_count_ != 0 on JNI return. JNI method acts as callee-save frame. thread->VerifyStack(); - thread->FullSuspendCheck(); + CheckSuspend(thread); } extern "C" void artTestSuspendFromCode(Thread* thread, AbstractMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { // Called when suspend count check value is 0 and thread->suspend_count_ != 0 FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsOnly); - thread->FullSuspendCheck(); + CheckSuspend(thread); } } // namespace art diff --git a/src/runtime.cc b/src/runtime.cc index 3a5c41ca08..7bc1b70881 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -1002,10 +1002,9 @@ void Runtime::VisitConcurrentRoots(Heap::RootVisitor* visitor, void* arg) { } } -void Runtime::VisitNonConcurrentRoots(Heap::RootVisitor* visitor, void* arg) { +void Runtime::VisitNonThreadRoots(Heap::RootVisitor* visitor, void* arg) { Dbg::VisitRoots(visitor, arg); java_vm_->VisitRoots(visitor, arg); - thread_list_->VisitRoots(visitor, arg); if (pre_allocated_OutOfMemoryError_ != NULL) { visitor(pre_allocated_OutOfMemoryError_, arg); } @@ -1020,6 +1019,11 @@ void Runtime::VisitNonConcurrentRoots(Heap::RootVisitor* visitor, void* arg) { } } +void Runtime::VisitNonConcurrentRoots(Heap::RootVisitor* visitor, void* arg) { + thread_list_->VisitRoots(visitor, arg); + VisitNonThreadRoots(visitor, arg); +} + void Runtime::DirtyRoots() { intern_table_->Dirty(); class_linker_->Dirty(); diff --git a/src/runtime.h b/src/runtime.h index 44823a0d1b..02f5206473 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -234,6 +234,9 @@ class Runtime { // Visit all of the roots we can do safely do concurrently. void VisitConcurrentRoots(Heap::RootVisitor* visitor, void* arg); + // Visit all of the non thread roots, we can do this with mutators unpaused. + void VisitNonThreadRoots(Heap::RootVisitor* visitor, void* arg); + // Visit all other roots which must be done with mutators suspended. void VisitNonConcurrentRoots(Heap::RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/src/scoped_thread_state_change.h b/src/scoped_thread_state_change.h index 32c3685096..39f5c3fe8a 100644 --- a/src/scoped_thread_state_change.h +++ b/src/scoped_thread_state_change.h @@ -49,6 +49,7 @@ class ScopedThreadStateChange { // A suspended transition to another effectively suspended transition, ok to use Unsafe. self_->SetState(new_thread_state); } + if (runnable_transition && old_thread_state_ != new_thread_state) { if (new_thread_state == kRunnable) { self_->TransitionFromSuspendedToRunnable(); diff --git a/src/signal_catcher.cc b/src/signal_catcher.cc index 80c37d48e4..b54c819775 100644 --- a/src/signal_catcher.cc +++ b/src/signal_catcher.cc @@ -145,8 +145,11 @@ void SignalCatcher::HandleSigQuit() { } } os << "----- end " << getpid() << " -----\n"; - CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable); + if (self->ReadFlag(kCheckpointRequest)) { + self->RunCheckpointFunction(); + self->AtomicClearFlag(kCheckpointRequest); + } self->EndAssertNoThreadSuspension(old_cause); thread_list->ResumeAll(); @@ -186,7 +189,7 @@ void* SignalCatcher::Run(void* arg) { CHECK(runtime->AttachCurrentThread("Signal Catcher", true, runtime->GetSystemThreadGroup())); Thread* self = Thread::Current(); - + DCHECK_NE(self->GetState(), kRunnable); { MutexLock mu(self, signal_catcher->lock_); signal_catcher->thread_ = self; diff --git a/src/thread.cc b/src/thread.cc index cea919fb47..67773a58c2 100644 --- a/src/thread.cc +++ b/src/thread.cc @@ -555,6 +555,19 @@ void Thread::ModifySuspendCount(Thread* self, int delta, bool for_debugger) { } } +bool Thread::RequestCheckpoint(CheckpointFunction* function) { + CHECK(!ReadFlag(kCheckpointRequest)) << "Already have a pending checkpoint request"; + checkpoint_function_ = function; + union StateAndFlags old_state_and_flags = state_and_flags_; + // We must be runnable to request a checkpoint. + old_state_and_flags.as_struct.state = kRunnable; + union StateAndFlags new_state_and_flags = old_state_and_flags; + new_state_and_flags.as_struct.flags |= kCheckpointRequest; + int succeeded = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int, + &state_and_flags_.as_int); + return succeeded == 0; +} + void Thread::FullSuspendCheck() { VLOG(threads) << this << " self-suspending"; // Make thread appear suspended to other threads, release mutator_lock_. @@ -570,7 +583,20 @@ void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) { DCHECK_EQ(this, Thread::Current()); // Change to non-runnable state, thereby appearing suspended to the system. DCHECK_EQ(GetState(), kRunnable); - state_and_flags_.as_struct.state = new_state; + union StateAndFlags old_state_and_flags; + union StateAndFlags new_state_and_flags; + do { + old_state_and_flags = state_and_flags_; + // Copy over flags and try to clear the checkpoint bit if it is set. + new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags & ~kCheckpointRequest; + new_state_and_flags.as_struct.state = new_state; + } while (android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int, + &state_and_flags_.as_int) != 0); + // If we toggled the checkpoint flag we must have cleared it. + uint16_t flag_change = new_state_and_flags.as_struct.flags ^ old_state_and_flags.as_struct.flags; + if ((flag_change & kCheckpointRequest) != 0) { + RunCheckpointFunction(); + } // Release share on mutator_lock_. Locks::mutator_lock_->SharedUnlock(this); } @@ -935,6 +961,7 @@ Thread::Thread(bool daemon) pthread_self_(0), no_thread_suspension_(0), last_no_thread_suspension_cause_(NULL), + checkpoint_function_(0), thread_exit_check_count_(0) { CHECK_EQ((sizeof(Thread) % 4), 0U) << sizeof(Thread); state_and_flags_.as_struct.flags = 0; diff --git a/src/thread.h b/src/thread.h index d559449d8e..abfd7195bd 100644 --- a/src/thread.h +++ b/src/thread.h @@ -101,10 +101,17 @@ enum ThreadFlag { kSuspendRequest = 1, // If set implies that suspend_count_ > 0. kExceptionPending = 2, // If set implies that exception_ != NULL. kEnterInterpreter = 4, // Instruct managed code it should enter the interpreter. + kCheckpointRequest = 8, // Request that the thread do some set of work. }; class PACKED Thread { public: + class CheckpointFunction { + public: + virtual ~CheckpointFunction() { } + virtual void Run(Thread* self) = 0; + }; + // Space to throw a StackOverflowError in. #if !defined(ART_USE_LLVM_COMPILER) static const size_t kStackOverflowReservedBytes = 4 * KB; @@ -167,13 +174,17 @@ class PACKED Thread { return debug_suspend_count_; } - bool IsSuspended() const EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_) { - return GetState() != kRunnable && ReadFlag(kSuspendRequest); + bool IsSuspended() const { + union StateAndFlags state_and_flags = state_and_flags_; + return state_and_flags.as_struct.state != kRunnable && + (state_and_flags.as_struct.flags & kSuspendRequest) != 0; } void ModifySuspendCount(Thread* self, int delta, bool for_debugger) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_); + bool RequestCheckpoint(CheckpointFunction* function); + // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero. void FullSuspendCheck() @@ -570,6 +581,19 @@ class PACKED Thread { held_mutexes_[level] = mutex; } + void RunCheckpointFunction() { + CHECK(checkpoint_function_ != NULL); + checkpoint_function_->Run(this); + } + + bool ReadFlag(ThreadFlag flag) const { + return (state_and_flags_.as_struct.flags & flag) != 0; + } + + void AtomicSetFlag(ThreadFlag flag); + + void AtomicClearFlag(ThreadFlag flag); + private: // We have no control over the size of 'bool', but want our boolean fields // to be 4-byte quantities. @@ -617,14 +641,6 @@ class PACKED Thread { void NotifyLocked(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(wait_mutex_); - bool ReadFlag(ThreadFlag flag) const { - return (state_and_flags_.as_struct.flags & flag) != 0; - } - - void AtomicSetFlag(ThreadFlag flag); - - void AtomicClearFlag(ThreadFlag flag); - static void ThreadExitCallback(void* arg); // TLS key used to retrieve the Thread*. @@ -759,6 +775,9 @@ class PACKED Thread { // Cause for last suspension. const char* last_no_thread_suspension_cause_; + // Pending checkpoint functions. + CheckpointFunction* checkpoint_function_; + public: // Runtime support function pointers // TODO: move this near the top, since changing its offset requires all oats to be recompiled! diff --git a/src/thread_list.cc b/src/thread_list.cc index 601a93f77b..4ad25aec8c 100644 --- a/src/thread_list.cc +++ b/src/thread_list.cc @@ -151,6 +151,79 @@ static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAF } #endif +size_t ThreadList::RunCheckpoint(Thread::CheckpointFunction* checkpoint_function) { + Thread* self = Thread::Current(); + if (kIsDebugBuild) { + Locks::mutator_lock_->AssertNotHeld(self); + Locks::thread_list_lock_->AssertNotHeld(self); + Locks::thread_suspend_count_lock_->AssertNotHeld(self); + CHECK_NE(self->GetState(), kRunnable); + } + + std::vector suspended_count_modified_threads; + size_t count = 0; + { + // Call a checkpoint function for each thread, threads which are suspend get their checkpoint + // manually called. + MutexLock mu(self, *Locks::thread_list_lock_); + // TODO: C++0x auto. + for (It it = list_.begin(), end = list_.end(); it != end; ++it) { + Thread* thread = *it; + if (thread != self) { + for (;;) { + if (thread->RequestCheckpoint(checkpoint_function)) { + // This thread will run it's checkpoint some time in the near future. + count++; + break; + } else { + // We are probably suspended, try to make sure that we stay suspended. + MutexLock mu2(self, *Locks::thread_suspend_count_lock_); + // The thread switched back to runnable. + if (thread->GetState() == kRunnable) { + continue; + } + thread->ModifySuspendCount(self, +1, false); + suspended_count_modified_threads.push_back(thread); + break; + } + } + } + } + } + + // Run the checkpoint on ourself while we wait for threads to suspend. + checkpoint_function->Run(self); + + // Run the checkpoint on the suspended threads. + for (size_t i = 0; i < suspended_count_modified_threads.size(); ++i) { + Thread* thread = suspended_count_modified_threads[i]; + if (!thread->IsSuspended()) { + // Wait until the thread is suspended. + uint64_t start = NanoTime(); + do { + // Sleep for 100us. + usleep(100); + } while (!thread->IsSuspended()); + uint64_t end = NanoTime(); + // Shouldn't need to wait for longer than 1 millisecond. + const uint64_t threshold = 1; + if (NsToMs(end - start) > threshold) { + LOG(INFO) << "Warning: waited longer than " << threshold << " ms for thrad suspend" + << std::endl; + } + } + // We know for sure that the thread is suspended at this point. + thread->RunCheckpointFunction(); + { + MutexLock mu2(self, *Locks::thread_suspend_count_lock_); + thread->ModifySuspendCount(self, -1, false); + } + } + + // Add one for self. + return count + suspended_count_modified_threads.size() + 1; +} + void ThreadList::SuspendAll() { Thread* self = Thread::Current(); diff --git a/src/thread_list.h b/src/thread_list.h index 3142fd3caf..a41fa57d47 100644 --- a/src/thread_list.h +++ b/src/thread_list.h @@ -55,6 +55,12 @@ class ThreadList { LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_); + // Run a checkpoint on threads, running threads are not suspended but run the checkpoint inside + // of the suspend check. Returns how many checkpoints we should expect to run. + size_t RunCheckpoint(Thread::CheckpointFunction* checkpoint_function); + LOCKS_EXCLUDED(Locks::thread_list_lock_, + Locks::thread_suspend_count_lock_); + // Suspends all threads void SuspendAllForDebugger() LOCKS_EXCLUDED(Locks::mutator_lock_, -- cgit v1.2.3-59-g8ed1b