diff options
| -rw-r--r-- | src/gc/card_table.cc | 19 | ||||
| -rw-r--r-- | src/gc/card_table.h | 167 | ||||
| -rw-r--r-- | src/gc/heap_bitmap.h | 3 | ||||
| -rw-r--r-- | src/gc/mark_sweep.cc | 60 | ||||
| -rw-r--r-- | src/gc/mark_sweep.h | 15 | ||||
| -rw-r--r-- | src/gc/mod_union_table.cc | 44 | ||||
| -rw-r--r-- | src/gc/mod_union_table.h | 8 | ||||
| -rw-r--r-- | src/gc/space.cc | 35 | ||||
| -rw-r--r-- | src/gc/space.h | 7 | ||||
| -rw-r--r-- | src/heap.cc | 113 | ||||
| -rw-r--r-- | src/heap.h | 13 | ||||
| -rw-r--r-- | src/utils.h | 21 |
12 files changed, 323 insertions, 182 deletions
diff --git a/src/gc/card_table.cc b/src/gc/card_table.cc index a5531d8fa0..1e978b5a59 100644 --- a/src/gc/card_table.cc +++ b/src/gc/card_table.cc @@ -96,25 +96,6 @@ void CardTable::ClearCardTable() { memset(mem_map_->Begin(), kCardClean, mem_map_->Size()); } -void CardTable::PreClearCards(ContinuousSpace* space, std::vector<byte*>& out_cards) { - byte* card_end = CardFromAddr(space->End()); - for (byte* card_cur = CardFromAddr(space->Begin()); card_cur < card_end; ++card_cur) { - if (*card_cur == kCardDirty) { - out_cards.push_back(card_cur); - *card_cur = kCardClean; - } - } -} - -void CardTable::GetDirtyCards(ContinuousSpace* space, std::vector<byte*>& out_cards) const { - byte* card_end = CardFromAddr(space->End()); - for (byte* card_cur = CardFromAddr(space->Begin());card_cur < card_end; ++card_cur) { - if (*card_cur == kCardDirty) { - out_cards.push_back(card_cur); - } - } -} - bool CardTable::AddrIsInCardTable(const void* addr) const { return IsValidCard(biased_begin_ + ((uintptr_t)addr >> kCardShift)); } diff --git a/src/gc/card_table.h b/src/gc/card_table.h index 035c073577..2a20fc81ab 100644 --- a/src/gc/card_table.h +++ b/src/gc/card_table.h @@ -22,6 +22,7 @@ #include "mem_map.h" #include "space_bitmap.h" #include "UniquePtr.h" +#include "utils.h" namespace art { @@ -72,32 +73,146 @@ class CardTable { return biased_begin_; } - // For every dirty card between begin and end invoke the visitor with the specified argument. + /* + * Visitor is expected to take in a card and return the new value. When a value is modified, the + * modify visitor is called. + * visitor: The visitor which modifies the cards. Returns the new value for a card given an old + * value. + * modified: Whenever the visitor modifies a card, this visitor is called on the card. Enables + * us to know which cards got cleared. + */ + template <typename Visitor, typename ModifiedVisitor> + void ModifyCardsAtomic(byte* scan_begin, byte* scan_end, const Visitor& visitor, + const ModifiedVisitor& modified = VoidFunctor()) { + byte* card_cur = CardFromAddr(scan_begin); + byte* card_end = CardFromAddr(scan_end); + CheckCardValid(card_cur); + CheckCardValid(card_end); + + // Handle any unaligned cards at the start. + while (!IsAligned<sizeof(word)>(card_cur) && card_cur < card_end) { + byte expected, new_value; + do { + expected = *card_cur; + new_value = visitor(expected); + } while (expected != new_value && UNLIKELY(byte_cas(expected, new_value, card_cur) != 0)); + if (expected != new_value) { + modified(card_cur, expected, new_value); + } + ++card_cur; + } + + // Handle unaligned cards at the end. + while (!IsAligned<sizeof(word)>(card_end) && card_end > card_cur) { + --card_end; + byte expected, new_value; + do { + expected = *card_end; + new_value = visitor(expected); + } while (expected != new_value && UNLIKELY(byte_cas(expected, new_value, card_end) != 0)); + if (expected != new_value) { + modified(card_cur, expected, new_value); + } + } + + // Now we have the words, we can process words in parallel. + uintptr_t* word_cur = reinterpret_cast<uintptr_t*>(card_cur); + uintptr_t* word_end = reinterpret_cast<uintptr_t*>(card_end); + uintptr_t expected_word; + uintptr_t new_word; + + // TODO: Parallelize. + while (word_cur < word_end) { + while ((expected_word = *word_cur) != 0) { + new_word = + (visitor((expected_word >> 0) & 0xFF) << 0) | + (visitor((expected_word >> 8) & 0xFF) << 8) | + (visitor((expected_word >> 16) & 0xFF) << 16) | + (visitor((expected_word >> 24) & 0xFF) << 24); + if (new_word == expected_word) { + // No need to do a cas. + break; + } + if (LIKELY(android_atomic_cas(expected_word, new_word, + reinterpret_cast<int32_t*>(word_cur)) == 0)) { + for (size_t i = 0; i < sizeof(uintptr_t); ++i) { + const byte expected_byte = (expected_word >> (8 * i)) & 0xFF; + const byte new_byte = (new_word >> (8 * i)) & 0xFF; + if (expected_byte != new_byte) { + modified(reinterpret_cast<byte*>(word_cur) + i, expected_byte, new_byte); + } + } + break; + } + } + ++word_cur; + } + } + + // For every dirty at least minumum age between begin and end invoke the visitor with the + // specified argument. template <typename Visitor, typename FingerVisitor> void Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end, - const Visitor& visitor, const FingerVisitor& finger_visitor) const + const Visitor& visitor, const FingerVisitor& finger_visitor, + const byte minimum_age = kCardDirty) const EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { DCHECK(bitmap->HasAddress(scan_begin)); DCHECK(bitmap->HasAddress(scan_end - 1)); // scan_end is the byte after the last byte we scan. byte* card_cur = CardFromAddr(scan_begin); byte* card_end = CardFromAddr(scan_end); - while (card_cur < card_end) { + CheckCardValid(card_cur); + CheckCardValid(card_end); + + // Handle any unaligned cards at the start. + while (!IsAligned<sizeof(word)>(card_cur) && card_cur < card_end) { + if (*card_cur >= minimum_age) { + uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card_cur)); + uintptr_t end = start + kCardSize; + bitmap->VisitMarkedRange(start, end, visitor, finger_visitor); + } + ++card_cur; + } + + byte* aligned_end = card_end - + (reinterpret_cast<uintptr_t>(card_end) & (sizeof(uintptr_t) - 1)); + + // Now we have the words, we can send these to be processed in parallel. + uintptr_t* word_cur = reinterpret_cast<uintptr_t*>(card_cur); + uintptr_t* word_end = reinterpret_cast<uintptr_t*>(aligned_end); + + // TODO: Parallelize + while (word_cur < word_end) { // Find the first dirty card. - card_cur = reinterpret_cast<byte*>(memchr(card_cur, kCardDirty, card_end - card_cur)); - if (card_cur == NULL) { + while (*word_cur == 0 && word_cur < word_end) { + word_cur++; + } + if (word_cur >= word_end) { break; } - byte* run_start = card_cur++; - - while (*card_cur == kCardDirty && card_cur < card_end) { - card_cur++; + uintptr_t start_word = *word_cur; + for (size_t i = 0; i < sizeof(uintptr_t); ++i) { + if ((start_word & 0xFF) == minimum_age) { + byte* card = reinterpret_cast<byte*>(word_cur) + i; + DCHECK_EQ(*card, start_word & 0xFF); + uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card)); + uintptr_t end = start + kCardSize; + bitmap->VisitMarkedRange(start, end, visitor, finger_visitor); + } + start_word >>= 8; } - byte* run_end = card_cur; + ++word_cur; + } - uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(run_start)); - uintptr_t end = reinterpret_cast<uintptr_t>(AddrFromCard(run_end)); - bitmap->VisitMarkedRange(start, end, visitor, finger_visitor); + // Handle any unaligned cards at the end. + card_cur = reinterpret_cast<byte*>(word_end); + while (card_cur < card_end) { + if (*card_cur >= minimum_age) { + uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card_cur)); + uintptr_t end = start + kCardSize; + bitmap->VisitMarkedRange(start, end, visitor, finger_visitor); + } + ++card_cur; } } @@ -110,12 +225,6 @@ class CardTable { // Resets all of the bytes in the card table which do not map to the image space. void ClearSpaceCards(ContinuousSpace* space); - // Clean all the cards which map to a space. - void PreClearCards(ContinuousSpace* space, std::vector<byte*>& out_cards); - - // Returns all of the dirty cards which map to a space. - void GetDirtyCards(ContinuousSpace* space, std::vector<byte*>& out_cards) const; - // Returns the first address in the heap which maps to this card. void* AddrFromCard(const byte *card_addr) const { DCHECK(IsValidCard(card_addr)) @@ -138,6 +247,19 @@ class CardTable { bool AddrIsInCardTable(const void* addr) const; private: + static int byte_cas(byte old_value, byte new_value, byte* address) { + // Little endian means most significant byte is on the left. + const size_t shift = reinterpret_cast<uintptr_t>(address) % sizeof(uintptr_t); + // Align the address down. + address -= shift; + int32_t* word_address = reinterpret_cast<int32_t*>(address); + // Word with the byte we are trying to cas cleared. + const int32_t cur_word = *word_address & ~(0xFF << shift); + const int32_t old_word = cur_word | (static_cast<int32_t>(old_value) << shift); + const int32_t new_word = cur_word | (static_cast<int32_t>(new_value) << shift); + return android_atomic_cas(old_word, new_word, word_address); + } + CardTable(MemMap* begin, byte* biased_begin, size_t offset); // Returns true iff the card table address is within the bounds of the card table. @@ -147,6 +269,13 @@ class CardTable { return card_addr >= begin && card_addr < end; } + void CheckCardValid(byte* card) const { + DCHECK(IsValidCard(card)) + << " card_addr: " << reinterpret_cast<const void*>(card) + << " begin: " << reinterpret_cast<void*>(mem_map_->Begin() + offset_) + << " end: " << reinterpret_cast<void*>(mem_map_->End()); + } + // Verifies that all gray objects are on a dirty card. void VerifyCardTable(); diff --git a/src/gc/heap_bitmap.h b/src/gc/heap_bitmap.h index 666fcc7dd9..42c4166ba0 100644 --- a/src/gc/heap_bitmap.h +++ b/src/gc/heap_bitmap.h @@ -73,8 +73,7 @@ namespace art { // TODO: C++0x auto for (Bitmaps::iterator it = bitmaps_.begin(); it != bitmaps_.end(); ++it) { SpaceBitmap* bitmap = *it; - bitmap->VisitMarkedRange(bitmap->HeapBegin(), bitmap->HeapLimit(), visitor, - IdentityFunctor()); + bitmap->VisitMarkedRange(bitmap->HeapBegin(), bitmap->HeapLimit(), visitor, VoidFunctor()); } large_objects_->Visit(visitor); } diff --git a/src/gc/mark_sweep.cc b/src/gc/mark_sweep.cc index 4662cf6901..f2617e4ec9 100644 --- a/src/gc/mark_sweep.cc +++ b/src/gc/mark_sweep.cc @@ -51,6 +51,7 @@ static const bool kCountClassesMarked = false; static const bool kProfileLargeObjects = false; static const bool kMeasureOverhead = false; static const bool kCountTasks = false; +static const bool kCountJavaLangRefs = false; class SetFingerVisitor { public: @@ -83,6 +84,7 @@ MarkSweep::MarkSweep(ObjectStack* mark_stack) large_object_test_(0), large_object_mark_(0), classes_marked_(0), overhead_time_(0), work_chunks_created_(0), work_chunks_deleted_(0), + reference_count_(0), gc_barrier_(new Barrier), large_object_lock_("large object lock") { DCHECK(mark_stack_ != NULL); @@ -272,8 +274,7 @@ class CheckObjectVisitor { } void operator ()(const Object* obj, const Object* ref, MemberOffset offset, bool is_static) const - SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, - Locks::mutator_lock_) { + NO_THREAD_SAFETY_ANALYSIS { mark_sweep_->CheckReference(obj, ref, offset, is_static); } @@ -327,7 +328,7 @@ class ScanObjectVisitor { MarkSweep* const mark_sweep_; }; -void MarkSweep::ScanGrayObjects() { +void MarkSweep::ScanGrayObjects(byte minimum_age) { const Spaces& spaces = heap_->GetSpaces(); CardTable* card_table = heap_->GetCardTable(); ScanObjectVisitor visitor(this); @@ -339,7 +340,7 @@ void MarkSweep::ScanGrayObjects() { byte* end = space->End(); // Image spaces are handled properly since live == marked for them. SpaceBitmap* mark_bitmap = space->GetMarkBitmap(); - card_table->Scan(mark_bitmap, begin, end, visitor, IdentityFunctor()); + card_table->Scan(mark_bitmap, begin, end, visitor, VoidFunctor(), minimum_age); } } @@ -373,7 +374,7 @@ void MarkSweep::VerifyImageRoots() { uintptr_t end = reinterpret_cast<uintptr_t>(space->End()); SpaceBitmap* live_bitmap = space->GetLiveBitmap(); DCHECK(live_bitmap != NULL); - live_bitmap->VisitMarkedRange(begin, end, visitor, IdentityFunctor()); + live_bitmap->VisitMarkedRange(begin, end, visitor, VoidFunctor()); } } } @@ -436,7 +437,7 @@ void MarkSweep::RecursiveMarkCards(CardTable* card_table, const std::vector<byte } } if (kDisableFinger) { - active_bitmap->VisitMarkedRange(begin, end, image_root_visitor, IdentityFunctor()); + active_bitmap->VisitMarkedRange(begin, end, image_root_visitor, VoidFunctor()); } else { active_bitmap->VisitMarkedRange(begin, end, image_root_visitor, finger_visitor); } @@ -452,8 +453,8 @@ bool MarkSweep::IsMarkedCallback(const Object* object, void* arg) { !reinterpret_cast<MarkSweep*>(arg)->GetHeap()->GetLiveBitmap()->Test(object); } -void MarkSweep::RecursiveMarkDirtyObjects() { - ScanGrayObjects(); +void MarkSweep::RecursiveMarkDirtyObjects(byte minimum_age) { + ScanGrayObjects(minimum_age); ProcessMarkStack(); } @@ -569,8 +570,8 @@ class CheckpointMarkThreadRoots : public Closure { virtual void Run(Thread* thread) NO_THREAD_SAFETY_ANALYSIS { // Note: self is not necessarily equal to thread since thread may be suspended. Thread* self = Thread::Current(); - DCHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc) - << thread->GetState(); + CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc) + << thread->GetState() << " thread " << thread << " self " << self; WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); thread->VisitRoots(MarkSweep::MarkObjectCallback, mark_sweep_); mark_sweep_->GetBarrier().Pass(self); @@ -585,12 +586,12 @@ Barrier& MarkSweep::GetBarrier() { } void MarkSweep::MarkRootsCheckpoint() { - UniquePtr<CheckpointMarkThreadRoots> check_point(new CheckpointMarkThreadRoots(this)); + CheckpointMarkThreadRoots check_point(this); ThreadList* thread_list = Runtime::Current()->GetThreadList(); // Increment the count of the barrier. If all of the checkpoints have already been finished then // will hit 0 and continue. Otherwise we are still waiting for some checkpoints, so the counter // will go positive and we will unblock when it hits zero. - gc_barrier_->Increment(Thread::Current(), thread_list->RunCheckpoint(check_point.get())); + gc_barrier_->Increment(Thread::Current(), thread_list->RunCheckpoint(&check_point)); } void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) { @@ -675,7 +676,7 @@ void MarkSweep::SweepArray(TimingLogger& logger, ObjectStack* allocations, bool freed_bytes_ += freed_bytes; logger.AddSplit("FreeList"); allocations->Reset(); - logger.AddSplit("Reset stack"); + logger.AddSplit("ResetStack"); } void MarkSweep::Sweep(TimingLogger& timings, bool partial, bool swap_bitmaps) { @@ -799,6 +800,9 @@ void MarkSweep::DelayReferenceReferent(Object* obj) { DCHECK(klass->IsReferenceClass()); Object* pending = obj->GetFieldObject<Object*>(heap_->GetReferencePendingNextOffset(), false); Object* referent = heap_->GetReferenceReferent(obj); + if (kCountJavaLangRefs) { + ++reference_count_; + } if (pending == NULL && referent != NULL && !IsMarked(referent)) { Object** list = NULL; if (klass->IsSoftReferenceClass()) { @@ -826,9 +830,7 @@ class MarkObjectVisitor { } void operator ()(const Object* /* obj */, const Object* ref, const MemberOffset& /* offset */, - bool /* is_static */) const - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + bool /* is_static */) const EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { mark_sweep_->MarkObject(ref); } @@ -963,13 +965,9 @@ public: virtual void Run(Thread* self) { int index; while ((index = index_++) < length_) { - static const size_t prefetch_look_ahead = 4; if (kUseMarkStackPrefetch) { - if (index + prefetch_look_ahead < length_) { - __builtin_prefetch(&data_[index + prefetch_look_ahead]); - } else { - __builtin_prefetch(&data_[length_ - 1]); - } + static const size_t prefetch_look_ahead = 1; + __builtin_prefetch(data_[std::min(index + prefetch_look_ahead, length_ - 1)]); } Object* obj = data_[index]; DCHECK(obj != NULL); @@ -1203,25 +1201,29 @@ void MarkSweep::UnBindBitmaps() { } MarkSweep::~MarkSweep() { - if (class_count_ != 0 || array_count_ != 0 || other_count_ != 0) { - LOG(INFO) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_ - << " other=" << other_count_; + if (kCountScannedTypes) { + VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_ + << " other=" << other_count_; } if (kCountTasks) { - LOG(INFO) << "Total number of work chunks allocated: " << work_chunks_created_; + VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_; } if (kMeasureOverhead) { - LOG(INFO) << "Overhead time " << PrettyDuration(overhead_time_); + VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_); } if (kProfileLargeObjects) { - LOG(INFO) << "Large objects tested " << large_object_test_ << " marked " << large_object_mark_; + VLOG(gc) << "Large objects tested " << large_object_test_ << " marked " << large_object_mark_; } if (kCountClassesMarked) { - LOG(INFO) << "Classes marked " << classes_marked_; + VLOG(gc) << "Classes marked " << classes_marked_; + } + + if (kCountJavaLangRefs) { + VLOG(gc) << "References scanned " << reference_count_; } // Ensure that the mark stack is empty. diff --git a/src/gc/mark_sweep.h b/src/gc/mark_sweep.h index f510943254..2297ce17f8 100644 --- a/src/gc/mark_sweep.h +++ b/src/gc/mark_sweep.h @@ -82,7 +82,7 @@ class MarkSweep { EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); // Builds a mark stack with objects on dirty cards and recursively mark until it empties. - void RecursiveMarkDirtyObjects() + void RecursiveMarkDirtyObjects(byte minimum_age = CardTable::kCardDirty) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -165,7 +165,7 @@ class MarkSweep { ++other_count_; } VisitOtherReferences(klass, obj, visitor); - if (klass->IsReferenceClass()) { + if (UNLIKELY(klass->IsReferenceClass())) { DelayReferenceReferent(const_cast<Object*>(obj)); } } @@ -329,9 +329,8 @@ class MarkSweep { template <typename Visitor> static void VisitFieldsReferences(const Object* obj, uint32_t ref_offsets, bool is_static, const Visitor& visitor) - SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, - Locks::mutator_lock_) { - if (ref_offsets != CLASS_WALK_SUPER) { + SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) { + if (LIKELY(ref_offsets != CLASS_WALK_SUPER)) { // Found a reference offset bitmap. Mark the specified offsets. while (ref_offsets != 0) { size_t right_shift = CLZ(ref_offsets); @@ -386,8 +385,9 @@ class MarkSweep { } // Blackens objects grayed during a garbage collection. - void ScanGrayObjects() - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); + void ScanGrayObjects(byte minimum_age) + EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Schedules an unmarked object for reference processing. void DelayReferenceReferent(Object* reference) @@ -459,6 +459,7 @@ class MarkSweep { AtomicInteger overhead_time_; AtomicInteger work_chunks_created_; AtomicInteger work_chunks_deleted_; + AtomicInteger reference_count_; UniquePtr<Barrier> gc_barrier_; Mutex large_object_lock_; diff --git a/src/gc/mod_union_table.cc b/src/gc/mod_union_table.cc index 967f79575d..5dd61e7d21 100644 --- a/src/gc/mod_union_table.cc +++ b/src/gc/mod_union_table.cc @@ -56,7 +56,7 @@ class ModUnionVisitor { bitmap_(bitmap) { } - void operator ()(Object* obj) const + void operator ()(const Object* obj) const SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) { DCHECK(obj != NULL); @@ -76,9 +76,12 @@ class ModUnionClearCardSetVisitor { : cleared_cards_(cleared_cards) { } - inline void operator ()(byte* card) const { - cleared_cards_->insert(card); + inline void operator ()(byte* card, byte expected_value, byte new_value) const { + if (expected_value == CardTable::kCardDirty) { + cleared_cards_->insert(card); + } } + private: std::set<byte*>* const cleared_cards_; }; @@ -89,8 +92,10 @@ class ModUnionClearCardVisitor { : cleared_cards_(cleared_cards) { } - void operator ()(byte* card) const { - cleared_cards_->push_back(card); + void operator ()(byte* card, byte expected_card, byte new_card) const { + if (expected_card == CardTable::kCardDirty) { + cleared_cards_->push_back(card); + } } private: std::vector<byte*>* cleared_cards_; @@ -124,7 +129,7 @@ void ModUnionTableBitmap::ClearCards(ContinuousSpace* space) { CardTable* card_table = heap_->GetCardTable(); ModUnionClearCardVisitor visitor(&cleared_cards_); // Clear dirty cards in the this image space and update the corresponding mod-union bits. - card_table->VisitClear(space->Begin(), space->End(), visitor); + card_table->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), visitor); } void ModUnionTableBitmap::Update() { @@ -145,7 +150,7 @@ void ModUnionTableBitmap::Update() { // At this point we need to update the mod-union bitmap to contain all the objects which reach // the alloc space. ModUnionVisitor visitor(heap_, bitmap); - space->GetLiveBitmap()->VisitMarkedRange(start, end, visitor, IdentityFunctor()); + space->GetLiveBitmap()->VisitMarkedRange(start, end, visitor, VoidFunctor()); } } @@ -172,7 +177,7 @@ void ModUnionTableBitmap::MarkReferences(MarkSweep* mark_sweep) { const ContinuousSpace* space = it->first; uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin()); uintptr_t end = reinterpret_cast<uintptr_t>(space->End()); - it->second->VisitMarkedRange(begin, end, image_root_scanner, IdentityFunctor()); + it->second->VisitMarkedRange(begin, end, image_root_scanner, VoidFunctor()); } } @@ -189,7 +194,7 @@ void ModUnionTableReferenceCache::ClearCards(ContinuousSpace* space) { CardTable* card_table = GetHeap()->GetCardTable(); ModUnionClearCardSetVisitor visitor(&cleared_cards_); // Clear dirty cards in the this space and update the corresponding mod-union bits. - card_table->VisitClear(space->Begin(), space->End(), visitor); + card_table->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), visitor); } class AddToReferenceArrayVisitor { @@ -204,8 +209,8 @@ class AddToReferenceArrayVisitor { // Extra parameters are required since we use this same visitor signature for checking objects. void operator ()(const Object* obj, const Object* ref, const MemberOffset& /* offset */, bool /* is_static */) const { - // Only add the reference if it fits our criteria. - if (mod_union_table_->AddReference(obj, ref)) { + // Only add the reference if it is non null and fits our criteria. + if (ref != NULL && mod_union_table_->AddReference(obj, ref)) { references_->push_back(ref); } } @@ -224,7 +229,7 @@ class ModUnionReferenceVisitor { references_(references) { } - void operator ()(Object* obj) const + void operator ()(const Object* obj) const SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) { DCHECK(obj != NULL); @@ -253,9 +258,10 @@ class CheckReferenceVisitor { // Extra parameters are required since we use this same visitor signature for checking objects. void operator ()(const Object* obj, const Object* ref, const MemberOffset& /* offset */, bool /* is_static */) const - SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) { + SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { Heap* heap = mod_union_table_->GetHeap(); - if (mod_union_table_->AddReference(obj, ref) && references_.find(ref) == references_.end()) { + if (ref != NULL && mod_union_table_->AddReference(obj, ref) && + references_.find(ref) == references_.end()) { ContinuousSpace* from_space = heap->FindSpaceFromObject(obj); ContinuousSpace* to_space = heap->FindSpaceFromObject(ref); LOG(INFO) << "Object " << reinterpret_cast<const void*>(obj) << "(" << PrettyTypeOf(obj) << ")" @@ -284,7 +290,7 @@ class ModUnionCheckReferences { references_(references) { } - void operator ()(Object* obj) const + void operator ()(const Object* obj) const SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) { DCHECK(obj != NULL); CheckReferenceVisitor visitor(mod_union_table_, references_); @@ -319,7 +325,7 @@ void ModUnionTableReferenceCache::Verify() { uintptr_t end = start + CardTable::kCardSize; SpaceBitmap* live_bitmap = heap->FindSpaceFromObject(reinterpret_cast<Object*>(start))->GetLiveBitmap(); - live_bitmap->VisitMarkedRange(start, end, visitor, IdentityFunctor()); + live_bitmap->VisitMarkedRange(start, end, visitor, VoidFunctor()); } } } @@ -339,7 +345,7 @@ void ModUnionTableReferenceCache::Update() { uintptr_t end = start + CardTable::kCardSize; SpaceBitmap* live_bitmap = heap->FindSpaceFromObject(reinterpret_cast<Object*>(start))->GetLiveBitmap(); - live_bitmap->VisitMarkedRange(start, end, visitor, IdentityFunctor()); + live_bitmap->VisitMarkedRange(start, end, visitor, VoidFunctor()); // Update the corresponding references for the card. // TODO: C++0x auto @@ -383,7 +389,7 @@ void ModUnionTableCardCache::ClearCards(ContinuousSpace* space) { CardTable* card_table = GetHeap()->GetCardTable(); ModUnionClearCardSetVisitor visitor(&cleared_cards_); // Clear dirty cards in the this space and update the corresponding mod-union bits. - card_table->VisitClear(space->Begin(), space->End(), visitor); + card_table->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), visitor); } // Mark all references to the alloc space(s). @@ -396,7 +402,7 @@ void ModUnionTableCardCache::MarkReferences(MarkSweep* mark_sweep) { uintptr_t end = start + CardTable::kCardSize; SpaceBitmap* live_bitmap = heap_->FindSpaceFromObject(reinterpret_cast<Object*>(start))->GetLiveBitmap(); - live_bitmap->VisitMarkedRange(start, end, visitor, IdentityFunctor()); + live_bitmap->VisitMarkedRange(start, end, visitor, VoidFunctor()); } } diff --git a/src/gc/mod_union_table.h b/src/gc/mod_union_table.h index 5e4733ae52..84592a44b5 100644 --- a/src/gc/mod_union_table.h +++ b/src/gc/mod_union_table.h @@ -185,11 +185,9 @@ public: return (*it)->GetGcRetentionPolicy() == kGcRetentionPolicyAlwaysCollect; } } - if (ref != NULL) { - Implementation::GetHeap()->DumpSpaces(); - LOG(FATAL) << "Reference " << ref << " not in any space!"; - } - return false; + // Assume it points to a large object. + // TODO: Check. + return true; } }; diff --git a/src/gc/space.cc b/src/gc/space.cc index a7a5942804..8595dd00ec 100644 --- a/src/gc/space.cc +++ b/src/gc/space.cc @@ -28,6 +28,8 @@ namespace art { +static const bool kPrefetchDuringDlMallocFreeList = true; + // Magic padding value that we use to check for buffer overruns. static const word kPaddingValue = 0xBAC0BAC0; @@ -308,7 +310,7 @@ size_t DlMallocSpace::Free(Thread* self, Object* ptr) { *reinterpret_cast<word*>(reinterpret_cast<byte*>(ptr) + AllocationSize(ptr) - sizeof(word) - kChunkOverhead), kPaddingValue); } - const size_t bytes_freed = AllocationSize(ptr); + const size_t bytes_freed = InternalAllocationSize(ptr); num_bytes_allocated_ -= bytes_freed; --num_objects_allocated_; mspace_free(mspace_, ptr); @@ -316,15 +318,21 @@ size_t DlMallocSpace::Free(Thread* self, Object* ptr) { } size_t DlMallocSpace::FreeList(Thread* self, size_t num_ptrs, Object** ptrs) { + DCHECK(ptrs != NULL); + // Don't need the lock to calculate the size of the freed pointers. size_t bytes_freed = 0; for (size_t i = 0; i < num_ptrs; i++) { - bytes_freed += AllocationSize(ptrs[i]); + Object* ptr = ptrs[i]; + const size_t look_ahead = 8; + if (kPrefetchDuringDlMallocFreeList && i + look_ahead < num_ptrs) { + // The head of chunk for the allocation is sizeof(size_t) behind the allocation. + __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + look_ahead]) - sizeof(size_t)); + } + bytes_freed += InternalAllocationSize(ptr); } - MutexLock mu(self, lock_); if (kDebugSpaces) { - CHECK(ptrs != NULL); size_t num_broken_ptrs = 0; for (size_t i = 0; i < num_ptrs; i++) { if (!Contains(ptrs[i])) { @@ -337,10 +345,14 @@ size_t DlMallocSpace::FreeList(Thread* self, size_t num_ptrs, Object** ptrs) { } CHECK_EQ(num_broken_ptrs, 0u); } - num_bytes_allocated_ -= bytes_freed; - num_objects_allocated_ -= num_ptrs; - mspace_bulk_free(mspace_, reinterpret_cast<void**>(ptrs), num_ptrs); - return bytes_freed; + + { + MutexLock mu(self, lock_); + num_bytes_allocated_ -= bytes_freed; + num_objects_allocated_ -= num_ptrs; + mspace_bulk_free(mspace_, reinterpret_cast<void**>(ptrs), num_ptrs); + return bytes_freed; + } } // Callback from dlmalloc when it needs to increase the footprint @@ -384,11 +396,16 @@ void* DlMallocSpace::MoreCore(intptr_t increment) { return original_end; } -size_t DlMallocSpace::AllocationSize(const Object* obj) { +// Virtual functions can't get inlined. +inline size_t DlMallocSpace::InternalAllocationSize(const Object* obj) { return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) + kChunkOverhead; } +size_t DlMallocSpace::AllocationSize(const Object* obj) { + return InternalAllocationSize(obj); +} + void MspaceMadviseCallback(void* start, void* end, size_t used_bytes, void* /* arg */) { // Is this chunk in use? if (used_bytes != 0) { diff --git a/src/gc/space.h b/src/gc/space.h index 81b03fa3f7..d816a10695 100644 --- a/src/gc/space.h +++ b/src/gc/space.h @@ -29,11 +29,7 @@ namespace art { -#ifndef NDEBUG -static const bool kDebugSpaces = true; -#else -static const bool kDebugSpaces = false; -#endif +static const bool kDebugSpaces = kIsDebugBuild; class DlMallocSpace; class ImageSpace; @@ -357,6 +353,7 @@ class DlMallocSpace : public MemMapSpace, public AllocSpace { } private: + size_t InternalAllocationSize(const Object* obj); Object* AllocWithoutGrowthLocked(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_); UniquePtr<SpaceBitmap> live_bitmap_; diff --git a/src/heap.cc b/src/heap.cc index b4cf4a9af5..2c52ff2f3f 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -268,7 +268,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max // TODO: Count objects in the image space here. num_bytes_allocated_ = 0; - // Max stack size in bytes. + // Default mark stack size in bytes. static const size_t default_mark_stack_size = 64 * KB; mark_stack_.reset(ObjectStack::Create("dalvik-mark-stack", default_mark_stack_size)); allocation_stack_.reset(ObjectStack::Create("dalvik-allocation-stack", @@ -854,6 +854,8 @@ void Heap::CollectGarbage(bool clear_soft_references) { void Heap::PreZygoteFork() { static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock); + // Do this before acquiring the zygote creation lock so that we don't get lock order violations. + CollectGarbage(false); Thread* self = Thread::Current(); MutexLock mu(self, zygote_creation_lock_); @@ -973,6 +975,7 @@ GcType Heap::CollectGarbageInternal(GcType gc_type, GcCause gc_cause, bool clear if (gc_type != kGcTypeSticky) { sticky_gc_count_ = 0; } + if (concurrent_gc_) { CollectGarbageConcurrentMarkSweepPlan(self, gc_type, gc_cause, clear_soft_references); } else { @@ -1024,17 +1027,8 @@ void Heap::CollectGarbageMarkSweepPlan(Thread* self, GcType gc_type, GcCause gc_ // Swap allocation stack and live stack, enabling us to have new allocations during this GC. SwapStacks(); - // We will need to know which cards were dirty for doing concurrent processing of dirty cards. - // TODO: Investigate using a mark stack instead of a vector. - std::vector<byte*> dirty_cards; - if (gc_type == kGcTypeSticky) { - for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) { - card_table_->GetDirtyCards(*it, dirty_cards); - } - } - - // Clear image space cards and keep track of cards we cleared in the mod-union table. - ClearCards(timings); + // Process dirty cards and add dirty cards to mod union tables. + ProcessCards(timings); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); if (gc_type == kGcTypePartial) { @@ -1088,7 +1082,9 @@ void Heap::CollectGarbageMarkSweepPlan(Thread* self, GcType gc_type, GcCause gc_ if (gc_type != kGcTypeSticky) { mark_sweep.RecursiveMark(gc_type == kGcTypePartial, timings); } else { - mark_sweep.RecursiveMarkCards(card_table_.get(), dirty_cards, timings); + // Use -1 since we want to scan all of the cards which we aged earlier when we did + // ClearCards. These are the cards which were dirty before the GC started. + mark_sweep.RecursiveMarkDirtyObjects(CardTable::kCardDirty - 1); } mark_sweep.DisableFinger(); @@ -1261,7 +1257,7 @@ class VerifyReferenceVisitor { ScanVisitor scan_visitor; byte* byte_cover_begin = reinterpret_cast<byte*>(card_table->AddrFromCard(card_addr)); card_table->Scan(bitmap, byte_cover_begin, byte_cover_begin + CardTable::kCardSize, - scan_visitor, IdentityFunctor()); + scan_visitor, VoidFunctor()); // Try and see if a mark sweep collector scans the reference. ObjectStack* mark_stack = heap_->mark_stack_.get(); @@ -1492,9 +1488,7 @@ void Heap::SwapLargeObjects() { } void Heap::SwapStacks() { - ObjectStack* temp = allocation_stack_.release(); - allocation_stack_.reset(live_stack_.release()); - live_stack_.reset(temp); + allocation_stack_.swap(live_stack_); // Sort the live stack so that we can quickly binary search it later. if (VERIFY_OBJECT_ENABLED) { @@ -1502,7 +1496,7 @@ void Heap::SwapStacks() { } } -void Heap::ClearCards(TimingLogger& timings) { +void Heap::ProcessCards(TimingLogger& timings) { // Clear image space cards and keep track of cards we cleared in the mod-union table. for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) { ContinuousSpace* space = *it; @@ -1513,8 +1507,10 @@ void Heap::ClearCards(TimingLogger& timings) { zygote_mod_union_table_->ClearCards(space); timings.AddSplit("ZygoteModUnionClearCards"); } else { - card_table_->ClearSpaceCards(space); - timings.AddSplit("ClearCards"); + // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards + // were dirty before the GC started. + card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), VoidFunctor()); + timings.AddSplit("AllocSpaceClearCards"); } } } @@ -1522,12 +1518,11 @@ void Heap::ClearCards(TimingLogger& timings) { void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, GcCause gc_cause, bool clear_soft_references) { TimingLogger timings("ConcurrentCollectGarbageInternal", true); - uint64_t gc_begin = NanoTime(), root_begin = 0, root_end = 0, dirty_begin = 0, dirty_end = 0; + uint64_t gc_begin = NanoTime(), dirty_begin = 0, dirty_end = 0; std::stringstream gc_type_str; gc_type_str << gc_type << " "; ThreadList* thread_list = Runtime::Current()->GetThreadList(); - std::vector<byte*> dirty_cards; size_t bytes_freed = 0; Object* cleared_references = NULL; { @@ -1567,30 +1562,34 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G mark_sweep.FindDefaultMarkBitmap(); } - mark_sweep.MarkRootsCheckpoint(); - timings.AddSplit("MarkRootsCheckpoint"); - - { - root_begin = NanoTime(); - - // Suspend all threads are get exclusive access to the heap. + if (verify_pre_gc_heap_) { thread_list->SuspendAll(); - timings.AddSplit("SuspendAll"); - Locks::mutator_lock_->AssertExclusiveHeld(self); - - if (verify_pre_gc_heap_) { + { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); if (!VerifyHeapReferences()) { LOG(FATAL) << "Pre " << gc_type_str.str() << "Gc verification failed"; } timings.AddSplit("VerifyHeapReferencesPreGC"); } + thread_list->ResumeAll(); + } - // Swap the stacks, this is safe since all the mutators are suspended at this point. - SwapStacks(); + // Process dirty cards and add dirty cards to mod union tables. + ProcessCards(timings); + + // Need to do this before the checkpoint since we don't want any threads to add references to + // the live stack during the recursive mark. + SwapStacks(); + timings.AddSplit("SwapStacks"); - // Check that all objects which reference things in the live stack are on dirty cards. - if (verify_missing_card_marks_) { + // Tell the running threads to suspend and mark their roots. + mark_sweep.MarkRootsCheckpoint(); + timings.AddSplit("MarkRootsCheckpoint"); + + // Check that all objects which reference things in the live stack are on dirty cards. + if (verify_missing_card_marks_) { + thread_list->SuspendAll(); + { ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); // Sort the live stack so that we can quickly binary search it later. std::sort(live_stack_->Begin(), live_stack_->End()); @@ -1598,35 +1597,22 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G LOG(FATAL) << "Pre GC verification of missing card marks failed"; } } - - // We will need to know which cards were dirty for doing concurrent processing of dirty cards. - // TODO: Investigate using a mark stack instead of a vector. - if (gc_type == kGcTypeSticky) { - dirty_cards.reserve(4 * KB); - for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) { - card_table_->GetDirtyCards(*it, dirty_cards); - } - timings.AddSplit("GetDirtyCards"); - } - - // Clear image space cards and keep track of cards we cleared in the mod-union table. - ClearCards(timings); + thread_list->ResumeAll(); } if (verify_mod_union_table_) { + thread_list->SuspendAll(); ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_); zygote_mod_union_table_->Update(); zygote_mod_union_table_->Verify(); mod_union_table_->Update(); mod_union_table_->Verify(); + thread_list->ResumeAll(); } - // Allow mutators to go again, acquire share on mutator_lock_ to continue. - thread_list->ResumeAll(); { + // Allow mutators to go again, acquire share on mutator_lock_ to continue. ReaderMutexLock reader_lock(self, *Locks::mutator_lock_); - root_end = NanoTime(); - timings.AddSplit("RootEnd"); // Mark the roots which we can do concurrently. WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); @@ -1649,7 +1635,8 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G // Recursively mark all the non-image bits set in the mark bitmap. mark_sweep.RecursiveMark(gc_type == kGcTypePartial, timings); } else { - mark_sweep.RecursiveMarkCards(card_table_.get(), dirty_cards, timings); + mark_sweep.RecursiveMarkDirtyObjects(CardTable::kCardDirty - 1); + timings.AddSplit("RecursiveMarkCards"); } mark_sweep.DisableFinger(); } @@ -1779,20 +1766,18 @@ void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, G timings.AddSplit("Finish"); // If the GC was slow, then print timings in the log. - uint64_t pause_roots = (root_end - root_begin) / 1000 * 1000; - uint64_t pause_dirty = (dirty_end - dirty_begin) / 1000 * 1000; + uint64_t pause_time = (dirty_end - dirty_begin) / 1000 * 1000; uint64_t duration = (NanoTime() - gc_begin) / 1000 * 1000; - total_paused_time_ += pause_roots + pause_dirty; - if (pause_roots > MsToNs(5) || pause_dirty > MsToNs(5) || - (gc_cause == kGcCauseForAlloc && duration > MsToNs(20))) { + total_paused_time_ += pause_time; + if (pause_time > MsToNs(5) || (gc_cause == kGcCauseForAlloc && duration > MsToNs(20))) { const size_t percent_free = GetPercentFree(); const size_t current_heap_size = GetUsedMemorySize(); const size_t total_memory = GetTotalMemory(); LOG(INFO) << gc_cause << " " << gc_type_str.str() << "Concurrent GC freed " << PrettySize(bytes_freed) << ", " << percent_free << "% free, " << PrettySize(current_heap_size) << "/" - << PrettySize(total_memory) << ", " << "paused " << PrettyDuration(pause_roots) - << "+" << PrettyDuration(pause_dirty) << " total " << PrettyDuration(duration); + << PrettySize(total_memory) << ", " << "paused " << PrettyDuration(pause_time) + << " total " << PrettyDuration(duration); if (VLOG_IS_ON(heap)) { timings.Dump(); } @@ -1946,8 +1931,8 @@ Object* Heap::DequeuePendingReference(Object** list) { Object* head = (*list)->GetFieldObject<Object*>(reference_pendingNext_offset_, false); Object* ref; - // TODO: Remove this lock, use atomic stacks for storing references. - MutexLock mu(Thread::Current(), *reference_queue_lock_); + // Note: the following code is thread-safe because it is only called from ProcessReferences which + // is single threaded. if (*list == head) { ref = *list; *list = NULL; diff --git a/src/heap.h b/src/heap.h index 6c4c38bd7a..584718ed49 100644 --- a/src/heap.h +++ b/src/heap.h @@ -59,6 +59,17 @@ class TimingLogger; typedef AtomicStack<Object*> ObjectStack; typedef std::vector<ContinuousSpace*> Spaces; +class AgeCardVisitor { + public: + byte operator ()(byte card) const { + if (card == CardTable::kCardDirty) { + return card - 1; + } else { + return 0; + } + } +}; + // The ordering of the enum matters, it is used to determine which GCs are run first. enum GcType { // No Gc @@ -382,7 +393,7 @@ class Heap { void SwapStacks(); // Clear cards and update the mod union table. - void ClearCards(TimingLogger& timings); + void ProcessCards(TimingLogger& timings); Spaces spaces_; diff --git a/src/utils.h b/src/utils.h index d33cc4bf37..b2b2de945d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -342,10 +342,25 @@ bool IsValidZipFilename(const std::string& filename); bool IsValidDexFilename(const std::string& filename); bool IsValidOatFilename(const std::string& filename); -class IdentityFunctor { +class VoidFunctor { public: - template <typename T> - inline T operator () (T t) const { return t; } + template <typename A> + inline void operator () (A a) const { + UNUSED(a); + } + + template <typename A, typename B> + inline void operator () (A a, B b) const { + UNUSED(a); + UNUSED(b); + } + + template <typename A, typename B, typename C> + inline void operator () (A a, B b, C c) const { + UNUSED(a); + UNUSED(b); + UNUSED(c); + } }; } // namespace art |