diff options
29 files changed, 1272 insertions, 318 deletions
diff --git a/runtime/Android.bp b/runtime/Android.bp index a2af30cfe5..f8b89688d0 100644 --- a/runtime/Android.bp +++ b/runtime/Android.bp @@ -142,6 +142,7 @@ libart_cc_defaults { "gc/collector/garbage_collector.cc", "gc/collector/immune_region.cc", "gc/collector/immune_spaces.cc", + "gc/collector/mark_compact.cc", "gc/collector/mark_sweep.cc", "gc/collector/partial_mark_sweep.cc", "gc/collector/semi_space.cc", diff --git a/runtime/gc/accounting/bitmap.h b/runtime/gc/accounting/bitmap.h index cab8ecf015..b050442a3e 100644 --- a/runtime/gc/accounting/bitmap.h +++ b/runtime/gc/accounting/bitmap.h @@ -140,7 +140,7 @@ class MemoryRangeBitmap : public Bitmap { // End of the memory range that the bitmap covers. ALWAYS_INLINE uintptr_t CoverEnd() const { - return cover_end_; + return cover_begin_ + kAlignment * BitmapSize(); } // Return the address associated with a bit index. @@ -150,41 +150,53 @@ class MemoryRangeBitmap : public Bitmap { return addr; } + ALWAYS_INLINE uintptr_t BitIndexFromAddrUnchecked(uintptr_t addr) const { + return (addr - CoverBegin()) / kAlignment; + } + // Return the bit index associated with an address . ALWAYS_INLINE uintptr_t BitIndexFromAddr(uintptr_t addr) const { - DCHECK(HasAddress(addr)) << CoverBegin() << " <= " << addr << " < " << CoverEnd(); - return (addr - CoverBegin()) / kAlignment; + uintptr_t result = BitIndexFromAddrUnchecked(addr); + DCHECK(result < BitmapSize()) << CoverBegin() << " <= " << addr << " < " << CoverEnd(); + return result; } ALWAYS_INLINE bool HasAddress(const uintptr_t addr) const { - return cover_begin_ <= addr && addr < cover_end_; + // Don't use BitIndexFromAddr() here as the addr passed to this function + // could be outside the range. If addr < cover_begin_, then the result + // underflows to some very large value past the end of the bitmap. + // Therefore, all operations are unsigned here. + bool ret = (addr - CoverBegin()) / kAlignment < BitmapSize(); + if (ret) { + DCHECK(CoverBegin() <= addr && addr < CoverEnd()) + << CoverBegin() << " <= " << addr << " < " << CoverEnd(); + } + return ret; } ALWAYS_INLINE bool Set(uintptr_t addr) { return SetBit(BitIndexFromAddr(addr)); } - ALWAYS_INLINE bool Clear(size_t addr) { + ALWAYS_INLINE bool Clear(uintptr_t addr) { return ClearBit(BitIndexFromAddr(addr)); } - ALWAYS_INLINE bool Test(size_t addr) const { + ALWAYS_INLINE bool Test(uintptr_t addr) const { return TestBit(BitIndexFromAddr(addr)); } // Returns true if the object was previously set. - ALWAYS_INLINE bool AtomicTestAndSet(size_t addr) { + ALWAYS_INLINE bool AtomicTestAndSet(uintptr_t addr) { return AtomicTestAndSetBit(BitIndexFromAddr(addr)); } private: MemoryRangeBitmap(MemMap&& mem_map, uintptr_t begin, size_t num_bits) : Bitmap(std::move(mem_map), num_bits), - cover_begin_(begin), - cover_end_(begin + kAlignment * num_bits) {} + cover_begin_(begin) {} uintptr_t const cover_begin_; - uintptr_t const cover_end_; DISALLOW_IMPLICIT_CONSTRUCTORS(MemoryRangeBitmap); }; diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc index b4026fc3f3..4a84799431 100644 --- a/runtime/gc/accounting/mod_union_table.cc +++ b/runtime/gc/accounting/mod_union_table.cc @@ -388,6 +388,11 @@ void ModUnionTableReferenceCache::Dump(std::ostream& os) { void ModUnionTableReferenceCache::VisitObjects(ObjectCallback callback, void* arg) { CardTable* const card_table = heap_->GetCardTable(); ContinuousSpaceBitmap* live_bitmap = space_->GetLiveBitmap(); + // Use an unordered_set for constant time search of card in the second loop. + // We don't want to change cleared_cards_ to unordered so that traversals are + // sequential in address order. + // TODO: Optimize this. + std::unordered_set<const uint8_t*> card_lookup_map; for (uint8_t* card : cleared_cards_) { uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card)); uintptr_t end = start + CardTable::kCardSize; @@ -396,10 +401,13 @@ void ModUnionTableReferenceCache::VisitObjects(ObjectCallback callback, void* ar [callback, arg](mirror::Object* obj) { callback(obj, arg); }); + card_lookup_map.insert(card); } - // This may visit the same card twice, TODO avoid this. for (const auto& pair : references_) { const uint8_t* card = pair.first; + if (card_lookup_map.find(card) != card_lookup_map.end()) { + continue; + } uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card)); uintptr_t end = start + CardTable::kCardSize; live_bitmap->VisitMarkedRange(start, diff --git a/runtime/gc/collector/mark_compact-inl.h b/runtime/gc/collector/mark_compact-inl.h index 1397fa6c51..9c46ed4f60 100644 --- a/runtime/gc/collector/mark_compact-inl.h +++ b/runtime/gc/collector/mark_compact-inl.h @@ -37,9 +37,10 @@ inline uintptr_t MarkCompact::LiveWordsBitmap<kAlignment>::SetLiveWords(uintptr_ // Bits that needs to be set in the first word, if it's not also the last word mask = ~(mask - 1); // loop over all the words, except the last one. - while (bm_address < end_bm_address) { + // TODO: optimize by using memset. Sometimes this function may get called with + // large ranges. + for (; bm_address < end_bm_address; bm_address++) { *bm_address |= mask; - bm_address++; // This needs to be set only once as we are setting all bits in the // subsequent iterations. Hopefully, the compiler will optimize it. mask = ~0; @@ -52,10 +53,15 @@ inline uintptr_t MarkCompact::LiveWordsBitmap<kAlignment>::SetLiveWords(uintptr_ template <size_t kAlignment> template <typename Visitor> inline void MarkCompact::LiveWordsBitmap<kAlignment>::VisitLiveStrides(uintptr_t begin_bit_idx, + uint8_t* end, const size_t bytes, Visitor&& visitor) const { - // TODO: we may require passing end addr/end_bit_offset to the function. - const uintptr_t end_bit_idx = MemRangeBitmap::BitIndexFromAddr(CoverEnd()); + DCHECK(IsAligned<kAlignment>(end)); + // We have to use the unchecked version of BitIndexFromAddr() as 'end' could + // be outside the range. Do explicit check here. + DCHECK_LE(reinterpret_cast<uintptr_t>(end), MemRangeBitmap::CoverEnd()); + const uintptr_t end_bit_idx = + MemRangeBitmap::BitIndexFromAddrUnchecked(reinterpret_cast<uintptr_t>(end)); DCHECK_LT(begin_bit_idx, end_bit_idx); uintptr_t begin_word_idx = Bitmap::BitIndexToWordIndex(begin_bit_idx); const uintptr_t end_word_idx = Bitmap::BitIndexToWordIndex(end_bit_idx); @@ -138,10 +144,10 @@ inline void MarkCompact::LiveWordsBitmap<kAlignment>::VisitLiveStrides(uintptr_t template <size_t kAlignment> inline -uint32_t MarkCompact::LiveWordsBitmap<kAlignment>::FindNthLiveWordOffset(size_t offset_vec_idx, +uint32_t MarkCompact::LiveWordsBitmap<kAlignment>::FindNthLiveWordOffset(size_t chunk_idx, uint32_t n) const { DCHECK_LT(n, kBitsPerVectorWord); - const size_t index = offset_vec_idx * kBitmapWordsPerVectorWord; + const size_t index = chunk_idx * kBitmapWordsPerVectorWord; for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) { uintptr_t word = Bitmap::Begin()[index + i]; if (~word == 0) { @@ -183,12 +189,56 @@ inline void MarkCompact::UpdateRef(mirror::Object* obj, MemberOffset offset) { } } -inline void MarkCompact::UpdateRoot(mirror::CompressedReference<mirror::Object>* root) { +inline bool MarkCompact::VerifyRootSingleUpdate(void* root, + mirror::Object* old_ref, + const RootInfo& info) { + void* stack_end = stack_end_; + void* stack_addr = stack_addr_; + + if (!live_words_bitmap_->HasAddress(old_ref)) { + return false; + } + if (stack_end == nullptr) { + pthread_attr_t attr; + size_t stack_size; + pthread_getattr_np(pthread_self(), &attr); + pthread_attr_getstack(&attr, &stack_addr, &stack_size); + pthread_attr_destroy(&attr); + stack_end = reinterpret_cast<char*>(stack_addr) + stack_size; + } + if (root < stack_addr || root > stack_end) { + auto ret = updated_roots_.insert(root); + DCHECK(ret.second) << "root=" << root + << " old_ref=" << old_ref + << " stack_addr=" << stack_addr + << " stack_end=" << stack_end; + } + DCHECK(reinterpret_cast<uint8_t*>(old_ref) >= black_allocations_begin_ + || live_words_bitmap_->Test(old_ref)) + << "ref=" << old_ref + << " RootInfo=" << info; + return true; +} + +inline void MarkCompact::UpdateRoot(mirror::CompressedReference<mirror::Object>* root, + const RootInfo& info) { DCHECK(!root->IsNull()); mirror::Object* old_ref = root->AsMirrorPtr(); - mirror::Object* new_ref = PostCompactAddress(old_ref); - if (old_ref != new_ref) { - root->Assign(new_ref); + if (VerifyRootSingleUpdate(root, old_ref, info)) { + mirror::Object* new_ref = PostCompactAddress(old_ref); + if (old_ref != new_ref) { + root->Assign(new_ref); + } + } +} + +inline void MarkCompact::UpdateRoot(mirror::Object** root, const RootInfo& info) { + mirror::Object* old_ref = *root; + if (VerifyRootSingleUpdate(root, old_ref, info)) { + mirror::Object* new_ref = PostCompactAddress(old_ref); + if (old_ref != new_ref) { + *root = new_ref; + } } } @@ -197,8 +247,8 @@ inline size_t MarkCompact::LiveWordsBitmap<kAlignment>::CountLiveWordsUpto(size_ const size_t word_offset = Bitmap::BitIndexToWordIndex(bit_idx); uintptr_t word; size_t ret = 0; - // This is needed only if we decide to make offset_vector chunks 128-bit but - // still choose to use 64-bit word for bitmap. Ideally we should use 128-bit + // This is needed only if we decide to make chunks 128-bit but still + // choose to use 64-bit word for bitmap. Ideally we should use 128-bit // SIMD instructions to compute popcount. if (kBitmapWordsPerVectorWord > 1) { for (size_t i = RoundDown(word_offset, kBitmapWordsPerVectorWord); i < word_offset; i++) { @@ -208,26 +258,46 @@ inline size_t MarkCompact::LiveWordsBitmap<kAlignment>::CountLiveWordsUpto(size_ } word = Bitmap::Begin()[word_offset]; const uintptr_t mask = Bitmap::BitIndexToMask(bit_idx); - DCHECK_NE(word & mask, 0u); + DCHECK_NE(word & mask, 0u) + << " word_offset:" << word_offset + << " bit_idx:" << bit_idx + << " bit_idx_in_word:" << (bit_idx % Bitmap::kBitsPerBitmapWord) + << std::hex << " word: 0x" << word + << " mask: 0x" << mask << std::dec; ret += POPCOUNT(word & (mask - 1)); return ret; } +inline mirror::Object* MarkCompact::PostCompactBlackObjAddr(mirror::Object* old_ref) const { + return reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(old_ref) + - black_objs_slide_diff_); +} + +inline mirror::Object* MarkCompact::PostCompactOldObjAddr(mirror::Object* old_ref) const { + const uintptr_t begin = live_words_bitmap_->Begin(); + const uintptr_t addr_offset = reinterpret_cast<uintptr_t>(old_ref) - begin; + const size_t vec_idx = addr_offset / kOffsetChunkSize; + const size_t live_bytes_in_bitmap_word = + live_words_bitmap_->CountLiveWordsUpto(addr_offset / kAlignment) * kAlignment; + return reinterpret_cast<mirror::Object*>(begin + + chunk_info_vec_[vec_idx] + + live_bytes_in_bitmap_word); +} + +inline mirror::Object* MarkCompact::PostCompactAddressUnchecked(mirror::Object* old_ref) const { + if (reinterpret_cast<uint8_t*>(old_ref) >= black_allocations_begin_) { + return PostCompactBlackObjAddr(old_ref); + } + return PostCompactOldObjAddr(old_ref); +} + inline mirror::Object* MarkCompact::PostCompactAddress(mirror::Object* old_ref) const { // TODO: To further speedup the check, maybe we should consider caching heap // start/end in this object. if (LIKELY(live_words_bitmap_->HasAddress(old_ref))) { - const uintptr_t begin = live_words_bitmap_->Begin(); - const uintptr_t addr_offset = reinterpret_cast<uintptr_t>(old_ref) - begin; - const size_t vec_idx = addr_offset / kOffsetChunkSize; - const size_t live_bytes_in_bitmap_word = - live_words_bitmap_->CountLiveWordsUpto(addr_offset / kAlignment) * kAlignment; - return reinterpret_cast<mirror::Object*>(begin - + offset_vector_[vec_idx] - + live_bytes_in_bitmap_word); - } else { - return old_ref; + return PostCompactAddressUnchecked(old_ref); } + return old_ref; } } // namespace collector diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index b4d903df59..0bad04424a 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -16,20 +16,26 @@ #include "mark_compact-inl.h" +#include "base/quasi_atomic.h" #include "base/systrace.h" #include "gc/accounting/mod_union_table-inl.h" #include "gc/reference_processor.h" #include "gc/space/bump_pointer_space.h" +#include "jit/jit_code_cache.h" #include "mirror/object-refvisitor-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread_list.h" #include <numeric> +#include <sys/mman.h> namespace art { namespace gc { namespace collector { +#ifndef MREMAP_DONTUNMAP +#define MREMAP_DONTUNMAP 4 +#endif // Turn of kCheckLocks when profiling the GC as it slows down the GC // significantly. static constexpr bool kCheckLocks = kDebugLocking; @@ -47,7 +53,8 @@ MarkCompact::MarkCompact(Heap* heap) : GarbageCollector(heap, "concurrent mark compact"), gc_barrier_(0), mark_stack_lock_("mark compact mark stack lock", kMarkSweepMarkStackLock), - bump_pointer_space_(heap->GetBumpPointerSpace()) { + bump_pointer_space_(heap->GetBumpPointerSpace()), + compacting_(false) { // TODO: Depending on how the bump-pointer space move is implemented. If we // switch between two virtual memories each time, then we will have to // initialize live_words_bitmap_ accordingly. @@ -56,27 +63,27 @@ MarkCompact::MarkCompact(Heap* heap) reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit()))); // Create one MemMap for all the data structures - size_t offset_vector_size = bump_pointer_space_->Capacity() / kOffsetChunkSize; + size_t chunk_info_vec_size = bump_pointer_space_->Capacity() / kOffsetChunkSize; size_t nr_moving_pages = bump_pointer_space_->Capacity() / kPageSize; size_t nr_non_moving_pages = heap->GetNonMovingSpace()->Capacity() / kPageSize; std::string err_msg; - info_map_.reset(MemMap::MapAnonymous("Concurrent mark-compact offset-vector", - offset_vector_size * sizeof(uint32_t) - + nr_non_moving_pages * sizeof(ObjReference) - + nr_moving_pages * sizeof(ObjReference) - + nr_moving_pages * sizeof(uint32_t), - PROT_READ | PROT_WRITE, - /*low_4gb=*/ false, - &err_msg)); - if (UNLIKELY(!info_map_->IsValid())) { - LOG(ERROR) << "Failed to allocate concurrent mark-compact offset-vector: " << err_msg; + info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector", + chunk_info_vec_size * sizeof(uint32_t) + + nr_non_moving_pages * sizeof(ObjReference) + + nr_moving_pages * sizeof(ObjReference) + + nr_moving_pages * sizeof(uint32_t), + PROT_READ | PROT_WRITE, + /*low_4gb=*/ false, + &err_msg); + if (UNLIKELY(!info_map_.IsValid())) { + LOG(ERROR) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg; } else { - uint8_t* p = info_map_->Begin(); - offset_vector_ = reinterpret_cast<uint32_t*>(p); - vector_length_ = offset_vector_size; + uint8_t* p = info_map_.Begin(); + chunk_info_vec_ = reinterpret_cast<uint32_t*>(p); + vector_length_ = chunk_info_vec_size; - p += offset_vector_size * sizeof(uint32_t); + p += chunk_info_vec_size * sizeof(uint32_t); first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p); p += nr_non_moving_pages * sizeof(ObjReference); @@ -85,6 +92,17 @@ MarkCompact::MarkCompact(Heap* heap) p += nr_moving_pages * sizeof(ObjReference); pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p); } + + from_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact from-space", + bump_pointer_space_->Capacity(), + PROT_NONE, + /*low_4gb=*/ kObjPtrPoisoning, + &err_msg); + if (UNLIKELY(!from_space_map_.IsValid())) { + LOG(ERROR) << "Failed to allocate concurrent mark-compact from-space" << err_msg; + } else { + from_space_begin_ = from_space_map_.Begin(); + } } void MarkCompact::BindAndResetBitmaps() { @@ -145,9 +163,10 @@ void MarkCompact::InitializePhase() { mark_stack_ = heap_->GetMarkStack(); CHECK(mark_stack_->IsEmpty()); immune_spaces_.Reset(); - compacting_ = false; moving_first_objs_count_ = 0; non_moving_first_objs_count_ = 0; + black_page_count_ = 0; + from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin(); } void MarkCompact::RunPhases() { @@ -161,7 +180,7 @@ void MarkCompact::RunPhases() { } { ScopedPause pause(this); - PausePhase(); + MarkingPause(); if (kIsDebugBuild) { bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked(); } @@ -171,14 +190,15 @@ void MarkCompact::RunPhases() { ReclaimPhase(); PrepareForCompaction(); } - { - ScopedPause pause(this); - PreCompactionPhase(); - } + + compacting_ = true; + PreCompactionPhase(); if (kConcurrentCompaction) { ReaderMutexLock mu(self, *Locks::mutator_lock_); CompactionPhase(); } + compacting_ = false; + GetHeap()->PostGcVerification(this); FinishPhase(); thread_running_gc_ = nullptr; @@ -187,22 +207,28 @@ void MarkCompact::RunPhases() { void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) { // Find the first live word first. size_t to_space_page_idx = 0; - uint32_t offset_in_vec_word; + uint32_t offset_in_chunk_word; uint32_t offset; mirror::Object* obj; const uintptr_t heap_begin = current_space_bitmap_->HeapBegin(); - size_t offset_vec_idx; + size_t chunk_idx; // Find the first live word in the space - for (offset_vec_idx = 0; offset_vector_[offset_vec_idx] == 0; offset_vec_idx++) { - if (offset_vec_idx > vec_len) { + for (chunk_idx = 0; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) { + if (chunk_idx > vec_len) { // We don't have any live data on the moving-space. return; } } // Use live-words bitmap to find the first word - offset_in_vec_word = live_words_bitmap_->FindNthLiveWordOffset(offset_vec_idx, /*n*/ 0); - offset = offset_vec_idx * kBitsPerVectorWord + offset_in_vec_word; + offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0); + offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word; + DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset + << " chunk_idx=" << chunk_idx + << " N=0" + << " offset_in_word=" << offset_in_chunk_word + << " word=" << std::hex + << live_words_bitmap_->GetWord(chunk_idx); // The first object doesn't require using FindPrecedingObject(). obj = reinterpret_cast<mirror::Object*>(heap_begin + offset * kAlignment); // TODO: add a check to validate the object. @@ -213,24 +239,31 @@ void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) { uint32_t page_live_bytes = 0; while (true) { - for (; page_live_bytes <= kPageSize; offset_vec_idx++) { - if (offset_vec_idx > vec_len) { + for (; page_live_bytes <= kPageSize; chunk_idx++) { + if (chunk_idx > vec_len) { moving_first_objs_count_ = to_space_page_idx; return; } - page_live_bytes += offset_vector_[offset_vec_idx]; + page_live_bytes += chunk_info_vec_[chunk_idx]; } - offset_vec_idx--; + chunk_idx--; page_live_bytes -= kPageSize; DCHECK_LE(page_live_bytes, kOffsetChunkSize); - DCHECK_LE(page_live_bytes, offset_vector_[offset_vec_idx]) - << " offset_vec_idx=" << offset_vec_idx + DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx]) + << " chunk_idx=" << chunk_idx << " to_space_page_idx=" << to_space_page_idx << " vec_len=" << vec_len; - offset_in_vec_word = + DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes)); + offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset( - offset_vec_idx, (offset_vector_[offset_vec_idx] - page_live_bytes) / kAlignment); - offset = offset_vec_idx * kBitsPerVectorWord + offset_in_vec_word; + chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment); + offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word; + DCHECK(live_words_bitmap_->Test(offset)) + << "offset=" << offset + << " chunk_idx=" << chunk_idx + << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment) + << " offset_in_word=" << offset_in_chunk_word + << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx); // TODO: Can we optimize this for large objects? If we are continuing a // large object that spans multiple pages, then we may be able to do without // calling FindPrecedingObject(). @@ -242,7 +275,7 @@ void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) { pre_compact_offset_moving_space_[to_space_page_idx] = offset; first_objs_moving_space_[to_space_page_idx].Assign(obj); to_space_page_idx++; - offset_vec_idx++; + chunk_idx++; } } @@ -279,7 +312,14 @@ void MarkCompact::InitNonMovingSpaceFirstObjects() { // Utilize, if any, large object that started in some preceding page, but // overlaps with this page as well. if (prev_obj != nullptr && prev_obj_end > begin) { + DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin)); first_objs_non_moving_space_[page_idx].Assign(prev_obj); + mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); + if (bump_pointer_space_->HasAddress(klass)) { + LOG(WARNING) << "found inter-page object " << prev_obj + << " in non-moving space with klass " << klass + << " in moving space"; + } } else { prev_obj_end = 0; // It's sufficient to only search for previous object in the preceding page. @@ -292,6 +332,12 @@ void MarkCompact::InitNonMovingSpaceFirstObjects() { + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment); } if (prev_obj_end > begin) { + mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); + if (bump_pointer_space_->HasAddress(klass)) { + LOG(WARNING) << "found inter-page object " << prev_obj + << " in non-moving space with klass " << klass + << " in moving space"; + } first_objs_non_moving_space_[page_idx].Assign(prev_obj); } else { // Find the first live object in this page @@ -316,7 +362,8 @@ void MarkCompact::PrepareForCompaction() { size_t vector_len = (black_allocations_begin_ - space_begin) / kOffsetChunkSize; DCHECK_LE(vector_len, vector_length_); for (size_t i = 0; i < vector_len; i++) { - DCHECK_LE(offset_vector_[i], kOffsetChunkSize); + DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize); + DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i)); } InitMovingSpaceFirstObjects(vector_len); InitNonMovingSpaceFirstObjects(); @@ -332,26 +379,38 @@ void MarkCompact::PrepareForCompaction() { // there and not try to compact it. // Furthermore, we might have some large objects and may not want to move such // objects. - // We can adjust, without too much effort, the values in the offset_vector_ such + // We can adjust, without too much effort, the values in the chunk_info_vec_ such // that the objects in the dense beginning area aren't moved. OTOH, large // objects, which could be anywhere in the heap, could also be kept from // moving by using a similar trick. The only issue is that by doing this we will // leave an unused hole in the middle of the heap which can't be used for // allocations until we do a *full* compaction. // - // At this point every element in the offset_vector_ contains the live-bytes + // At this point every element in the chunk_info_vec_ contains the live-bytes // of the corresponding chunk. For old-to-new address computation we need // every element to reflect total live-bytes till the corresponding chunk. - // + + // Live-bytes count is required to compute post_compact_end_ below. + uint32_t total; // Update the vector one past the heap usage as it is required for black // allocated objects' post-compact address computation. if (vector_len < vector_length_) { vector_len++; + total = 0; + } else { + // Fetch the value stored in the last element before it gets overwritten by + // std::exclusive_scan(). + total = chunk_info_vec_[vector_len - 1]; } - std::exclusive_scan(offset_vector_, offset_vector_ + vector_len, offset_vector_, 0); + std::exclusive_scan(chunk_info_vec_, chunk_info_vec_ + vector_len, chunk_info_vec_, 0); + total += chunk_info_vec_[vector_len - 1]; + for (size_t i = vector_len; i < vector_length_; i++) { - DCHECK_EQ(offset_vector_[i], 0u); + DCHECK_EQ(chunk_info_vec_[i], 0u); } + post_compact_end_ = AlignUp(space_begin + total, kPageSize); + CHECK_EQ(post_compact_end_, space_begin + moving_first_objs_count_ * kPageSize); + black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_; // How do we handle compaction of heap portion used for allocations after the // marking-pause? // All allocations after the marking-pause are considered black (reachable) @@ -360,8 +419,9 @@ void MarkCompact::PrepareForCompaction() { // where allocations took place before the marking-pause. And everything after // that will be slid with TLAB holes, and then TLAB info in TLS will be // appropriately updated in the pre-compaction pause. - // The offset-vector entries for the post marking-pause allocations will be + // The chunk-info vector entries for the post marking-pause allocations will be // also updated in the pre-compaction pause. + updated_roots_.reserve(1000000); } class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor { @@ -394,8 +454,8 @@ void MarkCompact::ReMarkRoots(Runtime* runtime) { } } -void MarkCompact::PausePhase() { - TimingLogger::ScopedTiming t("(Paused)PausePhase", GetTimings()); +void MarkCompact::MarkingPause() { + TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings()); Runtime* runtime = Runtime::Current(); Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_); { @@ -425,7 +485,16 @@ void MarkCompact::PausePhase() { live_stack_freeze_size_ = heap_->GetLiveStack()->Size(); } } - heap_->PreSweepingGcVerification(this); + // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk + // objects in bump-pointer space when we have a mark-bitmap to indicate live + // objects. At the same time we also need to be able to visit black allocations, + // even though they are not marked in the bitmap. Without both of these we fail + // pre-sweeping verification. As well as we leave windows open wherein a + // VisitObjects/Walk on the space would either miss some objects or visit + // unreachable ones. These windows are when we are switching from shared + // mutator-lock to exclusive and vice-versa starting from here till compaction pause. + // heap_->PreSweepingGcVerification(this); + // Disallow new system weaks to prevent a race which occurs when someone adds // a new system weak before we sweep them. Since this new system weak may not // be marked, the GC may incorrectly sweep it. This also fixes a race where @@ -438,14 +507,19 @@ void MarkCompact::PausePhase() { // Capture 'end' of moving-space at this point. Every allocation beyond this // point will be considered as in to-space. - // TODO: We may need to align up/adjust end to page boundary - black_allocations_begin_ = bump_pointer_space_->End(); + // Align-up to page boundary so that black allocations happen from next page + // onwards. + black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, kPageSize); + DCHECK(IsAligned<kAlignment>(black_allocations_begin_)); + black_allocations_begin_ = AlignUp(black_allocations_begin_, kPageSize); } -void MarkCompact::SweepSystemWeaks(Thread* self) { - TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); +void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) { + TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks", + GetTimings()); ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); - Runtime::Current()->SweepSystemWeaks(this); + // Don't sweep JIT weaks with other. They are separately done. + runtime->SweepSystemWeaks(this, !paused); } void MarkCompact::ProcessReferences(Thread* self) { @@ -497,7 +571,9 @@ void MarkCompact::ReclaimPhase() { Runtime* const runtime = Runtime::Current(); // Process the references concurrently. ProcessReferences(thread_running_gc_); - SweepSystemWeaks(thread_running_gc_); + // TODO: Try to merge this system-weak sweeping with the one while updating + // references during the compaction pause. + SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false); runtime->AllowNewSystemWeaks(); // Clean up class loaders after system weaks are swept since that is how we know if class // unloading occurred. @@ -525,7 +601,10 @@ class MarkCompact::RefsUpdateVisitor { mirror::Object* obj, uint8_t* begin, uint8_t* end) - : collector_(collector), obj_(obj), begin_(begin), end_(end) {} + : collector_(collector), obj_(obj), begin_(begin), end_(end) { + DCHECK(!kCheckBegin || begin != nullptr); + DCHECK(!kCheckEnd || end != nullptr); + } void operator()(mirror::Object* old ATTRIBUTE_UNUSED, MemberOffset offset, bool /* is_static */) const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) @@ -577,14 +656,22 @@ class MarkCompact::RefsUpdateVisitor { void MarkCompact::CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr) { DCHECK(IsAligned<kPageSize>(addr)); obj = GetFromSpaceAddr(obj); - // TODO: assert that offset is within obj and obj is a valid object - DCHECK(live_words_bitmap_->Test(offset)); + DCHECK(current_space_bitmap_->Test(obj) + && live_words_bitmap_->Test(obj)); + DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj + << " offset=" << offset + << " addr=" << static_cast<void*>(addr) + << " black_allocs_begin=" + << static_cast<void*>(black_allocations_begin_) + << " post_compact_addr=" + << static_cast<void*>(post_compact_end_); uint8_t* const start_addr = addr; // How many distinct live-strides do we have. size_t stride_count = 0; uint8_t* last_stride; uint32_t last_stride_begin = 0; live_words_bitmap_->VisitLiveStrides(offset, + black_allocations_begin_, kPageSize, [&] (uint32_t stride_begin, size_t stride_size, @@ -603,39 +690,27 @@ void MarkCompact::CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* add }); DCHECK_LT(last_stride, start_addr + kPageSize); DCHECK_GT(stride_count, 0u); - size_t obj_size; - { - // First object - // TODO: We can further differentiate on the basis of offset == 0 to avoid - // checking beginnings when it's true. But it's not a very likely case. - uint32_t offset_within_obj = offset * kAlignment - - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_); - const bool update_native_roots = offset_within_obj == 0; + size_t obj_size = 0; + uint32_t offset_within_obj = offset * kAlignment + - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_); + // First object + if (offset_within_obj > 0) { mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj); if (stride_count > 1) { RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this, to_ref, start_addr, nullptr); - obj_size = update_native_roots - ? obj->VisitRefsForCompaction(visitor, - MemberOffset(offset_within_obj), - MemberOffset(-1)) - : obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>( - visitor, MemberOffset(offset_within_obj), MemberOffset(-1)); + obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>( + visitor, MemberOffset(offset_within_obj), MemberOffset(-1)); } else { RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this, to_ref, start_addr, start_addr + kPageSize); - obj_size = update_native_roots - ? obj->VisitRefsForCompaction(visitor, - MemberOffset(offset_within_obj), - MemberOffset(offset_within_obj + kPageSize)) - : obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>( - visitor, - MemberOffset(offset_within_obj), - MemberOffset(offset_within_obj + kPageSize)); + obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>( + visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj + + kPageSize)); } obj_size = RoundUp(obj_size, kAlignment); DCHECK_GT(obj_size, offset_within_obj); @@ -647,7 +722,9 @@ void MarkCompact::CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* add } } - uint8_t* const end_addr = start_addr + kPageSize; + // Except for the last page being compacted, the pages will have addr == + // start_addr + kPageSize. + uint8_t* const end_addr = addr; addr = start_addr + obj_size; // All strides except the last one can be updated without any boundary // checks. @@ -676,6 +753,179 @@ void MarkCompact::CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* add addr += obj_size; from_addr += obj_size; } + // The last page that we compact may have some bytes left untouched in the + // end, we should zero them as the kernel copies at page granularity. + if (UNLIKELY(addr < start_addr + kPageSize)) { + std::memset(addr, 0x0, kPageSize - (addr - start_addr)); + } +} + +// We store the starting point (pre_compact_page - first_obj) and first-chunk's +// size. If more TLAB(s) started in this page, then those chunks are identified +// using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations(). +// If we find a set bit in the bitmap, then we copy the remaining page and then +// use the bitmap to visit each object for updating references. +void MarkCompact::SlideBlackPage(mirror::Object* first_obj, + const size_t page_idx, + uint8_t* const pre_compact_page, + uint8_t* dest) { + DCHECK(IsAligned<kPageSize>(pre_compact_page)); + size_t bytes_copied; + const uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx]; + mirror::Object* next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr(); + uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj)); + uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj); + uint8_t* const pre_compact_page_end = pre_compact_page + kPageSize; + uint8_t* const dest_page_end = dest + kPageSize; + DCHECK(IsAligned<kPageSize>(dest_page_end)); + + // We have empty portion at the beginning of the page. Zero it. + if (pre_compact_addr > pre_compact_page) { + bytes_copied = pre_compact_addr - pre_compact_page; + DCHECK_LT(bytes_copied, kPageSize); + std::memset(dest, 0x0, bytes_copied); + dest += bytes_copied; + } else { + bytes_copied = 0; + size_t offset = pre_compact_page - pre_compact_addr; + pre_compact_addr = pre_compact_page; + src_addr += offset; + DCHECK(IsAligned<kPageSize>(src_addr)); + } + // Copy the first chunk of live words + std::memcpy(dest, src_addr, first_chunk_size); + // Update references in the first chunk. Use object size to find next object. + { + size_t bytes_to_visit = first_chunk_size; + size_t obj_size; + // The first object started in some previous page. So we need to check the + // beginning. + DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr); + size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj); + if (bytes_copied == 0 && offset > 0) { + mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset); + mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset); + // If the next page's first-obj is in this page or nullptr, then we don't + // need to check end boundary + if (next_page_first_obj == nullptr + || (first_obj != next_page_first_obj + && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) { + RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this, + to_obj, + dest, + nullptr); + obj_size = from_obj->VisitRefsForCompaction< + /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor, + MemberOffset(offset), + MemberOffset(-1)); + } else { + RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this, + to_obj, + dest, + dest_page_end); + from_obj->VisitRefsForCompaction< + /*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(visitor, + MemberOffset(offset), + MemberOffset(offset + + kPageSize)); + return; + } + obj_size = RoundUp(obj_size, kAlignment); + obj_size -= offset; + dest += obj_size; + bytes_to_visit -= obj_size; + } + bytes_copied += first_chunk_size; + // If the last object in this page is next_page_first_obj, then we need to check end boundary + bool check_last_obj = false; + if (next_page_first_obj != nullptr + && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end + && bytes_copied == kPageSize) { + bytes_to_visit -= pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj); + check_last_obj = true; + } + while (bytes_to_visit > 0) { + mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this, + dest_obj, + nullptr, + nullptr); + obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1)); + obj_size = RoundUp(obj_size, kAlignment); + bytes_to_visit -= obj_size; + dest += obj_size; + } + DCHECK_EQ(bytes_to_visit, 0u); + if (check_last_obj) { + mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this, + dest_obj, + nullptr, + dest_page_end); + mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj); + obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, + MemberOffset(0), + MemberOffset(dest_page_end - dest)); + return; + } + } + + // Probably a TLAB finished on this page and/or a new TLAB started as well. + if (bytes_copied < kPageSize) { + src_addr += first_chunk_size; + pre_compact_addr += first_chunk_size; + // Use mark-bitmap to identify where objects are. First call + // VisitMarkedRange for only the first marked bit. If found, zero all bytes + // until that object and then call memcpy on the rest of the page. + // Then call VisitMarkedRange for all marked bits *after* the one found in + // this invocation. This time to visit references. + uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr); + uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end); + mirror::Object* found_obj = nullptr; + current_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit, + page_end, + [&found_obj](mirror::Object* obj) { + found_obj = obj; + }); + size_t remaining_bytes = kPageSize - bytes_copied; + if (found_obj == nullptr) { + // No more black objects in this page. Zero the remaining bytes and return. + std::memset(dest, 0x0, remaining_bytes); + return; + } + // Copy everything in this page, which includes any zeroed regions + // in-between. + std::memcpy(dest, src_addr, remaining_bytes); + DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end); + current_space_bitmap_->VisitMarkedRange( + reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize, + page_end, + [&found_obj, pre_compact_addr, dest, this] (mirror::Object* obj) + REQUIRES_SHARED(Locks::mutator_lock_) { + ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr; + mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> + visitor(this, ref, nullptr, nullptr); + ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, + MemberOffset(0), + MemberOffset(-1)); + // Remember for next round. + found_obj = obj; + }); + // found_obj may have been updated in VisitMarkedRange. Visit the last found + // object. + DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr); + DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end); + ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr; + mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this, + ref, + nullptr, + dest_page_end); + ref->VisitRefsForCompaction</*kFetchObjSize*/false>( + visitor, MemberOffset(0), MemberOffset(page_end - + reinterpret_cast<uintptr_t>(found_obj))); + } } void MarkCompact::CompactMovingSpace() { @@ -689,12 +939,32 @@ void MarkCompact::CompactMovingSpace() { // // TODO: Should we do this in reverse? If the probability of accessing an object // is inversely proportional to the object's age, then it may make sense. + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); uint8_t* current_page = bump_pointer_space_->Begin(); - for (size_t i = 0; i < moving_first_objs_count_; i++) { - CompactPage(first_objs_moving_space_[i].AsMirrorPtr(), - pre_compact_offset_moving_space_[i], + size_t idx = 0; + while (idx < moving_first_objs_count_) { + CompactPage(first_objs_moving_space_[idx].AsMirrorPtr(), + pre_compact_offset_moving_space_[idx], current_page); + idx++; + current_page += kPageSize; + } + // Allocated-black pages + size_t count = moving_first_objs_count_ + black_page_count_; + uint8_t* pre_compact_page = black_allocations_begin_; + DCHECK(IsAligned<kPageSize>(pre_compact_page)); + while (idx < count) { + mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr(); + if (first_obj != nullptr) { + DCHECK_GT(black_alloc_pages_first_chunk_size_[idx], 0u); + SlideBlackPage(first_obj, + idx, + pre_compact_page, + current_page); + } + pre_compact_page += kPageSize; current_page += kPageSize; + idx++; } } @@ -736,8 +1006,8 @@ void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) { if (reinterpret_cast<uint8_t*>(curr_obj) < page) { RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this, curr_obj, page, page + kPageSize); - curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>( - visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset); + curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>( + visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset); } else { RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this, curr_obj, page, page + kPageSize); @@ -746,6 +1016,7 @@ void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) { } void MarkCompact::UpdateNonMovingSpace() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); uint8_t* page = non_moving_space_->Begin(); for (size_t i = 0; i < non_moving_first_objs_count_; i++) { mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr(); @@ -757,21 +1028,358 @@ void MarkCompact::UpdateNonMovingSpace() { } } -void MarkCompact::PreCompactionPhase() { - TimingLogger::ScopedTiming t("(Paused)CompactionPhase", GetTimings()); +void MarkCompact::UpdateMovingSpaceBlackAllocations() { + // For sliding black pages, we need the first-object, which overlaps with the + // first byte of the page. Additionally, we compute the size of first chunk of + // black objects. This will suffice for most black pages. Unlike, compaction + // pages, here we don't need to pre-compute the offset within first-obj from + // where sliding has to start. That can be calculated using the pre-compact + // address of the page. Therefore, to save space, we store the first chunk's + // size in black_alloc_pages_first_chunk_size_ array. + // For the pages which may have holes after the first chunk, which could happen + // if a new TLAB starts in the middle of the page, we mark the objects in + // the mark-bitmap. So, if the first-chunk size is smaller than kPageSize, + // then we use the mark-bitmap for the remainder of the page. + uint8_t* const begin = bump_pointer_space_->Begin(); + uint8_t* black_allocs = black_allocations_begin_; + DCHECK_LE(begin, black_allocs); + size_t consumed_blocks_count = 0; + size_t first_block_size; + // Get the list of all blocks allocated in the bump-pointer space. + std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_, + &first_block_size); + DCHECK_LE(first_block_size, (size_t)(black_allocs - begin)); + if (block_sizes != nullptr) { + size_t black_page_idx = moving_first_objs_count_; + uint8_t* block_end = begin + first_block_size; + uint32_t remaining_chunk_size = 0; + uint32_t first_chunk_size = 0; + mirror::Object* first_obj = nullptr; + for (size_t block_size : *block_sizes) { + block_end += block_size; + // Skip the blocks that are prior to the black allocations. These will be + // merged with the main-block later. + if (black_allocs >= block_end) { + consumed_blocks_count++; + continue; + } + mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs); + bool set_mark_bit = remaining_chunk_size > 0; + // We don't know how many objects are allocated in the current block. When we hit + // a null assume it's the end. This works as every block is expected to + // have objects allocated linearly using bump-pointer. + // BumpPointerSpace::Walk() also works similarly. + while (black_allocs < block_end + && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { + if (first_obj == nullptr) { + first_obj = obj; + } + // We only need the mark-bitmap in the pages wherein a new TLAB starts in + // the middle of the page. + if (set_mark_bit) { + current_space_bitmap_->Set(obj); + } + size_t obj_size = RoundUp(obj->SizeOf(), kAlignment); + // Handle objects which cross page boundary, including objects larger + // than page size. + if (remaining_chunk_size + obj_size >= kPageSize) { + set_mark_bit = false; + first_chunk_size += kPageSize - remaining_chunk_size; + remaining_chunk_size += obj_size; + // We should not store first-object and remaining_chunk_size if there were + // unused bytes before this TLAB, in which case we must have already + // stored the values (below). + if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) { + black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size; + first_objs_moving_space_[black_page_idx].Assign(first_obj); + } + black_page_idx++; + remaining_chunk_size -= kPageSize; + // Consume an object larger than page size. + while (remaining_chunk_size >= kPageSize) { + black_alloc_pages_first_chunk_size_[black_page_idx] = kPageSize; + first_objs_moving_space_[black_page_idx].Assign(obj); + black_page_idx++; + remaining_chunk_size -= kPageSize; + } + first_obj = remaining_chunk_size > 0 ? obj : nullptr; + first_chunk_size = remaining_chunk_size; + } else { + DCHECK_LE(first_chunk_size, remaining_chunk_size); + first_chunk_size += obj_size; + remaining_chunk_size += obj_size; + } + black_allocs += obj_size; + obj = reinterpret_cast<mirror::Object*>(black_allocs); + } + DCHECK_LE(black_allocs, block_end); + DCHECK_LT(remaining_chunk_size, kPageSize); + // consume the unallocated portion of the block + if (black_allocs < block_end) { + // first-chunk of the current page ends here. Store it. + if (first_chunk_size > 0) { + black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size; + first_objs_moving_space_[black_page_idx].Assign(first_obj); + first_chunk_size = 0; + } + first_obj = nullptr; + size_t page_remaining = kPageSize - remaining_chunk_size; + size_t block_remaining = block_end - black_allocs; + if (page_remaining <= block_remaining) { + block_remaining -= page_remaining; + // current page and the subsequent empty pages in the block + black_page_idx += 1 + block_remaining / kPageSize; + remaining_chunk_size = block_remaining % kPageSize; + } else { + remaining_chunk_size += block_remaining; + } + black_allocs = block_end; + } + } + black_page_count_ = black_page_idx - moving_first_objs_count_; + delete block_sizes; + } + // Update bump-pointer space by consuming all the pre-black blocks into the + // main one. + bump_pointer_space_->SetBlockSizes(thread_running_gc_, + post_compact_end_ - begin, + consumed_blocks_count); +} + +void MarkCompact::UpdateNonMovingSpaceBlackAllocations() { + accounting::ObjectStack* stack = heap_->GetAllocationStack(); + const StackReference<mirror::Object>* limit = stack->End(); + uint8_t* const space_begin = non_moving_space_->Begin(); + for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) { + mirror::Object* obj = it->AsMirrorPtr(); + if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) { + non_moving_space_bitmap_->Set(obj); + // Clear so that we don't try to set the bit again in the next GC-cycle. + it->Clear(); + size_t idx = (reinterpret_cast<uint8_t*>(obj) - space_begin) / kPageSize; + uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), kPageSize); + mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr(); + if (first_obj == nullptr + || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) { + first_objs_non_moving_space_[idx].Assign(obj); + } + mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr(); + uint8_t* next_page_begin = page_begin + kPageSize; + if (next_page_first_obj == nullptr + || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) { + size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment); + uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size; + while (next_page_begin < obj_end) { + first_objs_non_moving_space_[idx++].Assign(obj); + next_page_begin += kPageSize; + } + } + // update first_objs count in case we went past non_moving_first_objs_count_ + non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx); + } + } +} + +class MarkCompact::ImmuneSpaceUpdateObjVisitor { + public: + explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {} + + ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES(Locks::mutator_lock_) { + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_, + obj, + /*begin_*/nullptr, + /*end_*/nullptr); + obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, + MemberOffset(0), + MemberOffset(-1)); + } + + static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) { + reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj); + } + + private: + MarkCompact* const collector_; +}; + +class MarkCompact::StackRefsUpdateVisitor : public Closure { + public: + explicit StackRefsUpdateVisitor(MarkCompact* collector, size_t bytes) + : collector_(collector), adjust_bytes_(bytes) {} + + void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) { + // Note: self is not necessarily equal to thread since thread may be suspended. + Thread* self = Thread::Current(); + CHECK(thread == self + || thread->IsSuspended() + || thread->GetState() == ThreadState::kWaitingPerformingGc) + << thread->GetState() << " thread " << thread << " self " << self; + thread->VisitRoots(collector_, kVisitRootFlagAllRoots); + // Subtract adjust_bytes_ from TLAB pointers (top, end etc.) to align it + // with the black-page slide that is performed during compaction. + thread->AdjustTlab(adjust_bytes_); + // TODO: update the TLAB pointers. + collector_->GetBarrier().Pass(self); + } + + private: + MarkCompact* const collector_; + const size_t adjust_bytes_; +}; + +class MarkCompact::CompactionPauseCallback : public Closure { + public: + explicit CompactionPauseCallback(MarkCompact* collector) : collector_(collector) {} + + void Run(Thread* thread) override REQUIRES(Locks::mutator_lock_) { + DCHECK_EQ(thread, collector_->thread_running_gc_); + { + pthread_attr_t attr; + size_t stack_size; + void* stack_addr; + pthread_getattr_np(pthread_self(), &attr); + pthread_attr_getstack(&attr, &stack_addr, &stack_size); + pthread_attr_destroy(&attr); + collector_->stack_addr_ = stack_addr; + collector_->stack_end_ = reinterpret_cast<char*>(stack_addr) + stack_size; + } + collector_->CompactionPause(); + + collector_->stack_end_ = nullptr; + } + + private: + MarkCompact* const collector_; +}; + +void MarkCompact::CompactionPause() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + Runtime* runtime = Runtime::Current(); non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap(); - // TODO: Refresh data-structures to catch-up on allocations that may have - // happened since PrepareForCompaction(). - compacting_ = true; + { + TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings()); + ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_); + // Refresh data-structures to catch-up on allocations that may have + // happened since marking-phase pause. + // There could be several TLABs that got allocated since marking pause. We + // don't want to compact them and instead update the TLAB info in TLS and + // let mutators continue to use the TLABs. + // We need to set all the bits in live-words bitmap corresponding to allocated + // objects. Also, we need to find the objects that are overlapping with + // page-begin boundaries. Unlike objects allocated before + // black_allocations_begin_, which can be identified via mark-bitmap, we can get + // this info only via walking the space past black_allocations_begin_, which + // involves fetching object size. + // TODO: We can reduce the time spent on this in a pause by performing one + // round of this concurrently prior to the pause. + UpdateMovingSpaceBlackAllocations(); + // Iterate over the allocation_stack_, for every object in the non-moving + // space: + // 1. Mark the object in live bitmap + // 2. Erase the object from allocation stack + // 3. In the corresponding page, if the first-object vector needs updating + // then do so. + UpdateNonMovingSpaceBlackAllocations(); + + heap_->GetReferenceProcessor()->UpdateRoots(this); + } + if (runtime->GetJit() != nullptr) { + runtime->GetJit()->GetCodeCache()->SweepRootTables(this); + } + + { + // TODO: Calculate freed objects and update that as well. + int32_t freed_bytes = black_objs_slide_diff_; + bump_pointer_space_->RecordFree(0, freed_bytes); + RecordFree(ObjectBytePair(0, freed_bytes)); + } + + { + TimingLogger::ScopedTiming t2("(Paused)Mremap", GetTimings()); + // TODO: Create mapping's at 2MB aligned addresses to benefit from optimized + // mremap. + size_t size = bump_pointer_space_->Capacity(); + void* ret = mremap(bump_pointer_space_->Begin(), + size, + size, + MREMAP_MAYMOVE | MREMAP_FIXED | MREMAP_DONTUNMAP, + from_space_begin_); + DCHECK_EQ(mprotect(from_space_begin_, size, PROT_READ), 0) + << "mprotect failed errno: " << errno; + CHECK_EQ(ret, static_cast<void*>(from_space_begin_)) + << "mremap to move pages from moving space to from-space failed with errno: " + << errno; + } + if (!kConcurrentCompaction) { + // We need to perform the heap compaction *before* root updation (below) so + // that assumptions that objects have already been compacted and laid down + // are not broken. UpdateNonMovingSpace(); CompactMovingSpace(); } + + { + // TODO: Immune space updation has to happen either before or after + // remapping pre-compact pages to from-space. And depending on when it's + // done, we have to invoke VisitRefsForCompaction() with or without + // read-barrier. + TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings()); + accounting::CardTable* const card_table = heap_->GetCardTable(); + for (auto& space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); + ImmuneSpaceUpdateObjVisitor visitor(this); + if (table != nullptr) { + table->ProcessCards(); + table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor); + } else { + WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_); + card_table->Scan<false>( + live_bitmap, + space->Begin(), + space->Limit(), + visitor, + accounting::CardTable::kCardDirty - 1); + } + } + } + { + TimingLogger::ScopedTiming t2("(Paused)UpdateConcurrentRoots", GetTimings()); + runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots); + } + { + // TODO: don't visit the transaction roots if it's not active. + TimingLogger::ScopedTiming t2("(Paused)UpdateNonThreadRoots", GetTimings()); + runtime->VisitNonThreadRoots(this); + } + { + TimingLogger::ScopedTiming t2("(Paused)UpdateSystemWeaks", GetTimings()); + SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/true); + } +} + +void MarkCompact::PreCompactionPhase() { + TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings()); + DCHECK_EQ(Thread::Current(), thread_running_gc_); + Locks::mutator_lock_->AssertNotHeld(thread_running_gc_); + gc_barrier_.Init(thread_running_gc_, 0); + StackRefsUpdateVisitor thread_visitor(this, black_objs_slide_diff_); + CompactionPauseCallback callback(this); + + size_t barrier_count = Runtime::Current()->GetThreadList()->FlipThreadRoots( + &thread_visitor, &callback, this, GetHeap()->GetGcPauseListener()); + + { + ScopedThreadStateChange tsc(thread_running_gc_, ThreadState::kWaitingForCheckPointsToRun); + gc_barrier_.Increment(thread_running_gc_, barrier_count); + } } void MarkCompact::CompactionPhase() { // TODO: This is the concurrent compaction phase. - TimingLogger::ScopedTiming t("CompactionPhase", GetTimings()); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); UNIMPLEMENTED(FATAL) << "Unreachable"; } @@ -1146,22 +1754,33 @@ class MarkCompact::RefFieldsVisitor { MarkCompact* const mark_compact_; }; +template <size_t kAlignment> +size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const { + const size_t index = chunk_idx * kBitmapWordsPerVectorWord; + size_t words = 0; + for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) { + words += POPCOUNT(Bitmap::Begin()[index + i]); + } + return words * kAlignment; +} + void MarkCompact::UpdateLivenessInfo(mirror::Object* obj) { DCHECK(obj != nullptr); uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj); size_t size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment); uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size); - size_t vec_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize; - // Compute the bit-index within the offset-vector word. + size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize; + // Compute the bit-index within the chunk-info vector word. bit_index %= kBitsPerVectorWord; size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment); - offset_vector_[vec_idx++] += first_chunk_portion; + chunk_info_vec_[chunk_idx++] += first_chunk_portion; DCHECK_LE(first_chunk_portion, size); for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) { - offset_vector_[vec_idx++] = kOffsetChunkSize; + DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u); + chunk_info_vec_[chunk_idx++] = kOffsetChunkSize; } - offset_vector_[vec_idx] += size; + chunk_info_vec_[chunk_idx] += size; } template <bool kUpdateLiveWords> @@ -1266,23 +1885,45 @@ void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj, void MarkCompact::VisitRoots(mirror::Object*** roots, size_t count, - const RootInfo& info ATTRIBUTE_UNUSED) { - for (size_t i = 0; i < count; ++i) { - MarkObjectNonNull(*roots[i]); + const RootInfo& info) { + if (compacting_) { + for (size_t i = 0; i < count; ++i) { + UpdateRoot(roots[i], info); + } + } else { + for (size_t i = 0; i < count; ++i) { + MarkObjectNonNull(*roots[i]); + } } } void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, - const RootInfo& info ATTRIBUTE_UNUSED) { - for (size_t i = 0; i < count; ++i) { - MarkObjectNonNull(roots[i]->AsMirrorPtr()); + const RootInfo& info) { + // TODO: do we need to check if the root is null or not? + if (compacting_) { + for (size_t i = 0; i < count; ++i) { + UpdateRoot(roots[i], info); + } + } else { + for (size_t i = 0; i < count; ++i) { + MarkObjectNonNull(roots[i]->AsMirrorPtr()); + } } } mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) { CHECK(obj != nullptr); if (current_space_bitmap_->HasAddress(obj)) { + if (compacting_) { + if (reinterpret_cast<uint8_t*>(obj) > black_allocations_begin_) { + return PostCompactBlackObjAddr(obj); + } else if (live_words_bitmap_->Test(obj)) { + return PostCompactOldObjAddr(obj); + } else { + return nullptr; + } + } return current_space_bitmap_->Test(obj) ? obj : nullptr; } else if (non_moving_space_bitmap_->HasAddress(obj)) { return non_moving_space_bitmap_->Test(obj) ? obj : nullptr; @@ -1324,11 +1965,12 @@ void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass, } void MarkCompact::FinishPhase() { - // TODO: unmap/madv_dontneed from-space mappings. - info_map_->MadviseDontNeedAndZero(); + info_map_.MadviseDontNeedAndZero(); live_words_bitmap_->ClearBitmap(); + from_space_map_.MadviseDontNeedAndZero(); CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty. mark_stack_->Reset(); + updated_roots_.clear(); DCHECK_EQ(thread_running_gc_, Thread::Current()); ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_); WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_); diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index 54fe7dbb55..e354ed88db 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -18,6 +18,7 @@ #define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ #include <memory> +#include <unordered_set> #include "base/atomic.h" #include "barrier.h" @@ -51,6 +52,14 @@ class MarkCompact : public GarbageCollector { void RunPhases() override REQUIRES(!Locks::mutator_lock_); + // Updated before (or in) pre-compaction pause and is accessed only in the + // pause or during concurrent compaction. The flag is reset after compaction + // is completed and never accessed by mutators. Therefore, safe to update + // without any memory ordering. + bool IsCompacting() const { + return compacting_; + } + GcType GetGcType() const override { return kGcTypeFull; } @@ -97,12 +106,25 @@ class MarkCompact : public GarbageCollector { mirror::Object* IsMarked(mirror::Object* obj) override REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + // Perform GC-root updation and heap protection so that during the concurrent + // compaction phase we can receive faults and compact the corresponding pages + // on the fly. This is performed in a STW pause. + void CompactionPause() REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_); + + mirror::Object* GetFromSpaceAddrFromBarrier(mirror::Object* old_ref) { + CHECK(compacting_); + if (live_words_bitmap_->HasAddress(old_ref)) { + return GetFromSpaceAddr(old_ref); + } + return old_ref; + } + private: using ObjReference = mirror::ObjectReference</*kPoisonReferences*/ false, mirror::Object>; - // Number of bits (live-words) covered by a single offset-vector (below) + // Number of bits (live-words) covered by a single chunk-info (below) // entry/word. // TODO: Since popcount is performed usomg SIMD instructions, we should - // consider using 128-bit in order to halve the offset-vector size. + // consider using 128-bit in order to halve the chunk-info size. static constexpr uint32_t kBitsPerVectorWord = kBitsPerIntPtrT; static constexpr uint32_t kOffsetChunkSize = kBitsPerVectorWord * kAlignment; static_assert(kOffsetChunkSize < kPageSize); @@ -124,37 +146,50 @@ class MarkCompact : public GarbageCollector { static_assert(IsPowerOfTwo(kBitmapWordsPerVectorWord)); static LiveWordsBitmap* Create(uintptr_t begin, uintptr_t end); - // Return offset (within the offset-vector chunk) of the nth live word. - uint32_t FindNthLiveWordOffset(size_t offset_vec_idx, uint32_t n) const; + // Return offset (within the indexed chunk-info) of the nth live word. + uint32_t FindNthLiveWordOffset(size_t chunk_idx, uint32_t n) const; // Sets all bits in the bitmap corresponding to the given range. Also // returns the bit-index of the first word. ALWAYS_INLINE uintptr_t SetLiveWords(uintptr_t begin, size_t size); // Count number of live words upto the given bit-index. This is to be used // to compute the post-compact address of an old reference. ALWAYS_INLINE size_t CountLiveWordsUpto(size_t bit_idx) const; - // Call visitor for every stride of contiguous marked bits in the live-word - // bitmap. Passes to the visitor index of the first marked bit in the - // stride, stride-size and whether it's the last stride in the given range - // or not. + // Call 'visitor' for every stride of contiguous marked bits in the live-words + // bitmap, starting from begin_bit_idx. Only visit 'bytes' live bytes or + // until 'end', whichever comes first. + // Visitor is called with index of the first marked bit in the stride, + // stride size and whether it's the last stride in the given range or not. template <typename Visitor> ALWAYS_INLINE void VisitLiveStrides(uintptr_t begin_bit_idx, + uint8_t* end, const size_t bytes, Visitor&& visitor) const; + // Count the number of live bytes in the given vector entry. + size_t LiveBytesInBitmapWord(size_t chunk_idx) const; void ClearBitmap() { Bitmap::Clear(); } ALWAYS_INLINE uintptr_t Begin() const { return MemRangeBitmap::CoverBegin(); } ALWAYS_INLINE bool HasAddress(mirror::Object* obj) const { return MemRangeBitmap::HasAddress(reinterpret_cast<uintptr_t>(obj)); } - bool Test(uintptr_t bit_index) { + ALWAYS_INLINE bool Test(uintptr_t bit_index) const { return Bitmap::TestBit(bit_index); } + ALWAYS_INLINE bool Test(mirror::Object* obj) const { + return MemRangeBitmap::Test(reinterpret_cast<uintptr_t>(obj)); + } + ALWAYS_INLINE uintptr_t GetWord(size_t index) const { + static_assert(kBitmapWordsPerVectorWord == 1); + return Bitmap::Begin()[index * kBitmapWordsPerVectorWord]; + } }; - // For a given pre-compact object, return its from-space address. + // For a given object address in pre-compact space, return the corresponding + // address in the from-space, where heap pages are relocated in the compaction + // pause. mirror::Object* GetFromSpaceAddr(mirror::Object* obj) const { - DCHECK(live_words_bitmap_->HasAddress(obj) && live_words_bitmap_->Test(obj)); - uintptr_t offset = reinterpret_cast<uintptr_t>(obj) - live_words_bitmap_->Begin(); - return reinterpret_cast<mirror::Object*>(from_space_begin_ + offset); + DCHECK(live_words_bitmap_->HasAddress(obj)) << " obj=" << obj; + return reinterpret_cast<mirror::Object*>(reinterpret_cast<uintptr_t>(obj) + + from_space_slide_diff_); } void InitializePhase(); @@ -162,34 +197,55 @@ class MarkCompact : public GarbageCollector { void MarkingPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); void CompactionPhase() REQUIRES_SHARED(Locks::mutator_lock_); - void SweepSystemWeaks(Thread* self, const bool paused) + void SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); // Update the reference at given offset in the given object with post-compact // address. ALWAYS_INLINE void UpdateRef(mirror::Object* obj, MemberOffset offset) REQUIRES_SHARED(Locks::mutator_lock_); + + // Verify that the gc-root is updated only once. Returns false if the update + // shouldn't be done. + ALWAYS_INLINE bool VerifyRootSingleUpdate(void* root, + mirror::Object* old_ref, + const RootInfo& info) + REQUIRES_SHARED(Locks::mutator_lock_); // Update the given root with post-compact address. - ALWAYS_INLINE void UpdateRoot(mirror::CompressedReference<mirror::Object>* root) + ALWAYS_INLINE void UpdateRoot(mirror::CompressedReference<mirror::Object>* root, + const RootInfo& info = RootInfo(RootType::kRootUnknown)) + REQUIRES_SHARED(Locks::mutator_lock_); + ALWAYS_INLINE void UpdateRoot(mirror::Object** root, + const RootInfo& info = RootInfo(RootType::kRootUnknown)) REQUIRES_SHARED(Locks::mutator_lock_); // Given the pre-compact address, the function returns the post-compact // address of the given object. ALWAYS_INLINE mirror::Object* PostCompactAddress(mirror::Object* old_ref) const REQUIRES_SHARED(Locks::mutator_lock_); + // Compute post-compact address of an object in moving space. This function + // assumes that old_ref is in moving space. + ALWAYS_INLINE mirror::Object* PostCompactAddressUnchecked(mirror::Object* old_ref) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Compute the new address for an object which was allocated prior to starting + // this GC cycle. + ALWAYS_INLINE mirror::Object* PostCompactOldObjAddr(mirror::Object* old_ref) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Compute the new address for an object which was black allocated during this + // GC cycle. + ALWAYS_INLINE mirror::Object* PostCompactBlackObjAddr(mirror::Object* old_ref) const + REQUIRES_SHARED(Locks::mutator_lock_); // Identify immune spaces and reset card-table, mod-union-table, and mark // bitmaps. void BindAndResetBitmaps() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); // Perform one last round of marking, identifying roots from dirty cards - // during a stop-the-world (STW) pause - void PausePhase() REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_); - // Perform GC-root updation and heap protection so that during the concurrent - // compaction phase we can receive faults and compact the corresponding pages - // on the fly. This is performed in a STW pause. - void PreCompactionPhase() REQUIRES(Locks::mutator_lock_); - // Compute offset-vector and other data structures required during concurrent - // compaction + // during a stop-the-world (STW) pause. + void MarkingPause() REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_); + // Perform stop-the-world pause prior to concurrent compaction. + void PreCompactionPhase() REQUIRES(!Locks::mutator_lock_); + // Compute offsets (in chunk_info_vec_) and other data structures required + // during concurrent compaction. void PrepareForCompaction() REQUIRES_SHARED(Locks::mutator_lock_); // Copy kPageSize live bytes starting from 'offset' (within the moving space), @@ -218,6 +274,24 @@ class MarkCompact : public GarbageCollector { // and pre_compact_offset_moving_space_ respectively. void InitMovingSpaceFirstObjects(const size_t vec_len) REQUIRES_SHARED(Locks::mutator_lock_); + // Gather the info related to black allocations from bump-pointer space to + // enable concurrent sliding of these pages. + void UpdateMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + // Update first-object info from allocation-stack for non-moving space black + // allocations. + void UpdateNonMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + + // Slides (retain the empty holes, which are usually part of some in-use TLAB) + // black page in the moving space. 'first_obj' is the object that overlaps with + // the first byte of the page being slid. pre_compact_page is the pre-compact + // address of the page being slid. 'page_idx' is used to fetch the first + // allocated chunk's size and next page's first_obj. 'dest' is the kPageSize + // sized memory where the contents would be copied. + void SlideBlackPage(mirror::Object* first_obj, + const size_t page_idx, + uint8_t* const pre_compact_page, + uint8_t* dest) + REQUIRES_SHARED(Locks::mutator_lock_); // Perform reference-processing and the likes before sweeping the non-movable // spaces. @@ -264,7 +338,7 @@ class MarkCompact : public GarbageCollector { REQUIRES(Locks::heap_bitmap_lock_); // Scan object for references. If kUpdateLivewords is true then set bits in - // the live-words bitmap and add size to the offset vector. + // the live-words bitmap and add size to chunk-info. template <bool kUpdateLiveWords> void ScanObject(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); @@ -274,7 +348,7 @@ class MarkCompact : public GarbageCollector { REQUIRES(Locks::heap_bitmap_lock_); // Update the live-words bitmap as well as add the object size to the - // offset_vector. Both are required for computation of post-compact addresses. + // chunk-info vector. Both are required for computation of post-compact addresses. void UpdateLivenessInfo(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_); void ProcessReferences(Thread* self) @@ -318,34 +392,67 @@ class MarkCompact : public GarbageCollector { // which we will clear in the end. This would help in limiting the number of // VMAs that get created in the kernel. std::unique_ptr<LiveWordsBitmap<kAlignment>> live_words_bitmap_; + // Track GC-roots updated so far in a GC-cycle. This is to confirm that no + // GC-root is updated twice. + // TODO: Must be replaced with an efficient mechanism eventually. Or ensure + // that double updation doesn't happen in the first place. + std::unordered_set<void*> updated_roots_; + MemMap from_space_map_; // Any array of live-bytes in logical chunks of kOffsetChunkSize size // in the 'to-be-compacted' space. - std::unique_ptr<MemMap> info_map_; - uint32_t* offset_vector_; + MemMap info_map_; // The main space bitmap accounting::ContinuousSpaceBitmap* current_space_bitmap_; accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_; + space::ContinuousSpace* non_moving_space_; + space::BumpPointerSpace* const bump_pointer_space_; + Thread* thread_running_gc_; + size_t vector_length_; + size_t live_stack_freeze_size_; // For every page in the to-space (post-compact heap) we need to know the // first object from which we must compact and/or update references. This is // for both non-moving and moving space. Additionally, for the moving-space, // we also need the offset within the object from where we need to start // copying. - uint32_t* pre_compact_offset_moving_space_; + // chunk_info_vec_ holds live bytes for chunks during marking phase. After + // marking we perform an exclusive scan to compute offset for every chunk. + uint32_t* chunk_info_vec_; + // Array holding offset within first-object from where contents should be + // copied for a given post-compact page. + // For black-alloc pages, the array stores first-chunk-size. + union { + uint32_t* pre_compact_offset_moving_space_; + uint32_t* black_alloc_pages_first_chunk_size_; + }; // first_objs_moving_space_[i] is the address of the first object for the ith page ObjReference* first_objs_moving_space_; ObjReference* first_objs_non_moving_space_; size_t non_moving_first_objs_count_; // Number of first-objects in the moving space. size_t moving_first_objs_count_; + // Number of pages consumed by black objects. + size_t black_page_count_; uint8_t* from_space_begin_; + // moving-space's end pointer at the marking pause. All allocations beyond + // this will be considered black in the current GC cycle. Aligned up to page + // size. uint8_t* black_allocations_begin_; - size_t vector_length_; - size_t live_stack_freeze_size_; - space::ContinuousSpace* non_moving_space_; - const space::BumpPointerSpace* bump_pointer_space_; - Thread* thread_running_gc_; + // End of compacted space. Use for computing post-compact addr of black + // allocated objects. Aligned up to page size. + uint8_t* post_compact_end_; + // Cache (black_allocations_begin_ - post_compact_end_) for post-compact + // address computations. + ptrdiff_t black_objs_slide_diff_; + // Cache (from_space_begin_ - bump_pointer_space_->Begin()) so that we can + // compute from-space address of a given pre-comapct addr efficiently. + ptrdiff_t from_space_slide_diff_; + + // TODO: Remove once an efficient mechanism to deal with double root updation + // is incorporated. + void* stack_addr_; + void* stack_end_; // Set to true when compacting starts. bool compacting_; @@ -356,6 +463,9 @@ class MarkCompact : public GarbageCollector { class CardModifiedVisitor; class RefFieldsVisitor; template <bool kCheckBegin, bool kCheckEnd> class RefsUpdateVisitor; + class StackRefsUpdateVisitor; + class CompactionPauseCallback; + class ImmuneSpaceUpdateObjVisitor; DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact); }; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 8407ba4376..466ee70ed6 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -61,6 +61,7 @@ #include "gc/accounting/remembered_set.h" #include "gc/accounting/space_bitmap-inl.h" #include "gc/collector/concurrent_copying.h" +#include "gc/collector/mark_compact.h" #include "gc/collector/mark_sweep.h" #include "gc/collector/partial_mark_sweep.h" #include "gc/collector/semi_space.h" @@ -448,7 +449,8 @@ Heap::Heap(size_t initial_size, mark_bitmap_.reset(new accounting::HeapBitmap(this)); // We don't have hspace compaction enabled with CC. - if (foreground_collector_type_ == kCollectorTypeCC) { + if (foreground_collector_type_ == kCollectorTypeCC + || foreground_collector_type_ == kCollectorTypeCMC) { use_homogeneous_space_compaction_for_oom_ = false; } bool support_homogeneous_space_compaction = @@ -629,10 +631,14 @@ Heap::Heap(size_t initial_size, std::move(main_mem_map_1)); CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space"; AddSpace(bump_pointer_space_); - temp_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 2", - std::move(main_mem_map_2)); - CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space"; - AddSpace(temp_space_); + // For Concurrent Mark-compact GC we don't need the temp space to be in + // lower 4GB. So its temp space will be created by the GC itself. + if (foreground_collector_type_ != kCollectorTypeCMC) { + temp_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 2", + std::move(main_mem_map_2)); + CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space"; + AddSpace(temp_space_); + } CHECK(separate_non_moving_space); } else { CreateMainMallocSpace(std::move(main_mem_map_1), initial_size, growth_limit_, capacity_); @@ -758,6 +764,10 @@ Heap::Heap(size_t initial_size, semi_space_collector_ = new collector::SemiSpace(this); garbage_collectors_.push_back(semi_space_collector_); } + if (MayUseCollector(kCollectorTypeCMC)) { + mark_compact_ = new collector::MarkCompact(this); + garbage_collectors_.push_back(mark_compact_); + } if (MayUseCollector(kCollectorTypeCC)) { concurrent_copying_collector_ = new collector::ConcurrentCopying(this, /*young_gen=*/false, @@ -963,7 +973,6 @@ void Heap::DecrementDisableMovingGC(Thread* self) { void Heap::IncrementDisableThreadFlip(Thread* self) { // Supposed to be called by mutators. If thread_flip_running_ is true, block. Otherwise, go ahead. - CHECK(kUseReadBarrier); bool is_nested = self->GetDisableThreadFlipCount() > 0; self->IncrementDisableThreadFlipCount(); if (is_nested) { @@ -997,7 +1006,6 @@ void Heap::IncrementDisableThreadFlip(Thread* self) { void Heap::DecrementDisableThreadFlip(Thread* self) { // Supposed to be called by mutators. Decrement disable_thread_flip_count_ and potentially wake up // the GC waiting before doing a thread flip. - CHECK(kUseReadBarrier); self->DecrementDisableThreadFlipCount(); bool is_outermost = self->GetDisableThreadFlipCount() == 0; if (!is_outermost) { @@ -1017,7 +1025,6 @@ void Heap::DecrementDisableThreadFlip(Thread* self) { void Heap::ThreadFlipBegin(Thread* self) { // Supposed to be called by GC. Set thread_flip_running_ to be true. If disable_thread_flip_count_ // > 0, block. Otherwise, go ahead. - CHECK(kUseReadBarrier); ScopedThreadStateChange tsc(self, ThreadState::kWaitingForGcThreadFlip); MutexLock mu(self, *thread_flip_lock_); thread_flip_cond_->CheckSafeToWait(self); @@ -1043,7 +1050,6 @@ void Heap::ThreadFlipBegin(Thread* self) { void Heap::ThreadFlipEnd(Thread* self) { // Supposed to be called by GC. Set thread_flip_running_ to false and potentially wake up mutators // waiting before doing a JNI critical. - CHECK(kUseReadBarrier); MutexLock mu(self, *thread_flip_lock_); CHECK(thread_flip_running_); thread_flip_running_ = false; @@ -2199,6 +2205,15 @@ void Heap::ChangeCollector(CollectorType collector_type) { } break; } + case kCollectorTypeCMC: { + gc_plan_.push_back(collector::kGcTypeFull); + if (use_tlab_) { + ChangeAllocator(kAllocatorTypeTLAB); + } else { + ChangeAllocator(kAllocatorTypeBumpPointer); + } + break; + } case kCollectorTypeSS: { gc_plan_.push_back(collector::kGcTypeFull); if (use_tlab_) { @@ -2710,6 +2725,9 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, semi_space_collector_->SetSwapSemiSpaces(true); collector = semi_space_collector_; break; + case kCollectorTypeCMC: + collector = mark_compact_; + break; case kCollectorTypeCC: collector::ConcurrentCopying* active_cc_collector; if (use_generational_cc_) { @@ -2728,7 +2746,9 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, default: LOG(FATAL) << "Invalid collector type " << static_cast<size_t>(collector_type_); } - if (collector != active_concurrent_copying_collector_.load(std::memory_order_relaxed)) { + // temp_space_ will be null for kCollectorTypeCMC. + if (temp_space_ != nullptr + && collector != active_concurrent_copying_collector_.load(std::memory_order_relaxed)) { temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); if (kIsDebugBuild) { // Try to read each page of the memory map in case mprotect didn't work properly b/19894268. @@ -4494,8 +4514,13 @@ mirror::Object* Heap::AllocWithNewTLAB(Thread* self, DCHECK_LE(alloc_size, self->TlabSize()); } else if (allocator_type == kAllocatorTypeTLAB) { DCHECK(bump_pointer_space_ != nullptr); + // Try to allocate a page-aligned TLAB (not necessary though). + // TODO: for large allocations, which are rare, maybe we should allocate + // that object and return. There is no need to revoke the current TLAB, + // particularly if it's mostly unutilized. + size_t def_pr_tlab_size = RoundDown(alloc_size + kDefaultTLABSize, kPageSize) - alloc_size; size_t next_tlab_size = JHPCalculateNextTlabSize(self, - kDefaultTLABSize, + def_pr_tlab_size, alloc_size, &take_sample, &bytes_until_sample); diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 609a3c5957..13640acc35 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -665,6 +665,10 @@ class Heap { return live_stack_.get(); } + accounting::ObjectStack* GetAllocationStack() REQUIRES_SHARED(Locks::heap_bitmap_lock_) { + return allocation_stack_.get(); + } + void PreZygoteFork() NO_THREAD_SAFETY_ANALYSIS; // Mark and empty stack. @@ -816,6 +820,10 @@ class Heap { return active_collector; } + collector::MarkCompact* MarkCompactCollector() { + return mark_compact_; + } + CollectorType CurrentCollectorType() { return collector_type_; } @@ -1036,6 +1044,7 @@ class Heap { return collector_type == kCollectorTypeCC || collector_type == kCollectorTypeSS || + collector_type == kCollectorTypeCMC || collector_type == kCollectorTypeCCBackground || collector_type == kCollectorTypeHomogeneousSpaceCompact; } @@ -1227,6 +1236,7 @@ class Heap { // sweep GC, false for other GC types. bool IsGcConcurrent() const ALWAYS_INLINE { return collector_type_ == kCollectorTypeCC || + collector_type_ == kCollectorTypeCMC || collector_type_ == kCollectorTypeCMS || collector_type_ == kCollectorTypeCCBackground; } @@ -1592,6 +1602,7 @@ class Heap { std::vector<collector::GarbageCollector*> garbage_collectors_; collector::SemiSpace* semi_space_collector_; + collector::MarkCompact* mark_compact_; Atomic<collector::ConcurrentCopying*> active_concurrent_copying_collector_; collector::ConcurrentCopying* young_concurrent_copying_collector_; collector::ConcurrentCopying* concurrent_copying_collector_; diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h index 20f7a93eb1..41f2f7da6a 100644 --- a/runtime/gc/space/bump_pointer_space-inl.h +++ b/runtime/gc/space/bump_pointer_space-inl.h @@ -20,6 +20,7 @@ #include "bump_pointer_space.h" #include "base/bit_utils.h" +#include "mirror/object-inl.h" namespace art { namespace gc { @@ -44,6 +45,14 @@ inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t size_t* bytes_allocated, size_t* usable_size, size_t* bytes_tl_bulk_allocated) { + { + // We don't create blocks for these allocations. So confirm that we are still + // operating on the main-block. + // TODO: If the assertion fails, then start associating + // each allocation here to a new block. + MutexLock mu(self, block_lock_); + CHECK(block_sizes_.empty()); + } Locks::mutator_lock_->AssertExclusiveHeld(self); num_bytes = RoundUp(num_bytes, kAlignment); uint8_t* end = end_.load(std::memory_order_relaxed); @@ -89,6 +98,11 @@ inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) { return ret; } +inline mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) { + const uintptr_t position = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf(); + return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment)); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/bump_pointer_space-walk-inl.h b/runtime/gc/space/bump_pointer_space-walk-inl.h index 5d05ea2d65..a978f62c61 100644 --- a/runtime/gc/space/bump_pointer_space-walk-inl.h +++ b/runtime/gc/space/bump_pointer_space-walk-inl.h @@ -17,12 +17,14 @@ #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_WALK_INL_H_ #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_WALK_INL_H_ -#include "bump_pointer_space.h" +#include "bump_pointer_space-inl.h" #include "base/bit_utils.h" #include "mirror/object-inl.h" #include "thread-current-inl.h" +#include <memory> + namespace art { namespace gc { namespace space { @@ -32,6 +34,7 @@ inline void BumpPointerSpace::Walk(Visitor&& visitor) { uint8_t* pos = Begin(); uint8_t* end = End(); uint8_t* main_end = pos; + std::unique_ptr<std::vector<size_t>> block_sizes_copy; // Internal indirection w/ NO_THREAD_SAFETY_ANALYSIS. Optimally, we'd like to have an annotation // like // REQUIRES_AS(visitor.operator(mirror::Object*)) @@ -49,15 +52,17 @@ inline void BumpPointerSpace::Walk(Visitor&& visitor) { MutexLock mu(Thread::Current(), block_lock_); // If we have 0 blocks then we need to update the main header since we have bump pointer style // allocation into an unbounded region (actually bounded by Capacity()). - if (num_blocks_ == 0) { + if (block_sizes_.empty()) { UpdateMainBlock(); } main_end = Begin() + main_block_size_; - if (num_blocks_ == 0) { + if (block_sizes_.empty()) { // We don't have any other blocks, this means someone else may be allocating into the main // block. In this case, we don't want to try and visit the other blocks after the main block // since these could actually be part of the main block. end = main_end; + } else { + block_sizes_copy.reset(new std::vector<size_t>(block_sizes_.begin(), block_sizes_.end())); } } // Walk all of the objects in the main block first. @@ -66,31 +71,33 @@ inline void BumpPointerSpace::Walk(Visitor&& visitor) { // No read barrier because obj may not be a valid object. if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() == nullptr) { // There is a race condition where a thread has just allocated an object but not set the - // class. We can't know the size of this object, so we don't visit it and exit the function - // since there is guaranteed to be not other blocks. - return; + // class. We can't know the size of this object, so we don't visit it and break the loop + pos = main_end; + break; } else { no_thread_safety_analysis_visit(obj); pos = reinterpret_cast<uint8_t*>(GetNextObject(obj)); } } // Walk the other blocks (currently only TLABs). - while (pos < end) { - BlockHeader* header = reinterpret_cast<BlockHeader*>(pos); - size_t block_size = header->size_; - pos += sizeof(BlockHeader); // Skip the header so that we know where the objects - mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos); - const mirror::Object* end_obj = reinterpret_cast<const mirror::Object*>(pos + block_size); - CHECK_LE(reinterpret_cast<const uint8_t*>(end_obj), End()); - // We don't know how many objects are allocated in the current block. When we hit a null class - // assume its the end. TODO: Have a thread update the header when it flushes the block? - // No read barrier because obj may not be a valid object. - while (obj < end_obj && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { - no_thread_safety_analysis_visit(obj); - obj = GetNextObject(obj); + if (block_sizes_copy != nullptr) { + for (size_t block_size : *block_sizes_copy) { + mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos); + const mirror::Object* end_obj = reinterpret_cast<const mirror::Object*>(pos + block_size); + CHECK_LE(reinterpret_cast<const uint8_t*>(end_obj), End()); + // We don't know how many objects are allocated in the current block. When we hit a null class + // assume it's the end. TODO: Have a thread update the header when it flushes the block? + // No read barrier because obj may not be a valid object. + while (obj < end_obj && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { + no_thread_safety_analysis_visit(obj); + obj = GetNextObject(obj); + } + pos += block_size; } - pos += block_size; + } else { + CHECK_EQ(end, main_end); } + CHECK_EQ(pos, end); } } // namespace space diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc index fc41dbaa55..7753f73ca4 100644 --- a/runtime/gc/space/bump_pointer_space.cc +++ b/runtime/gc/space/bump_pointer_space.cc @@ -54,8 +54,7 @@ BumpPointerSpace::BumpPointerSpace(const std::string& name, uint8_t* begin, uint growth_end_(limit), objects_allocated_(0), bytes_allocated_(0), block_lock_("Block lock"), - main_block_size_(0), - num_blocks_(0) { + main_block_size_(0) { // This constructor gets called only from Heap::PreZygoteFork(), which // doesn't require a mark_bitmap. } @@ -70,8 +69,7 @@ BumpPointerSpace::BumpPointerSpace(const std::string& name, MemMap&& mem_map) growth_end_(mem_map_.End()), objects_allocated_(0), bytes_allocated_(0), block_lock_("Block lock", kBumpPointerSpaceBlockLock), - main_block_size_(0), - num_blocks_(0) { + main_block_size_(0) { mark_bitmap_ = accounting::ContinuousSpaceBitmap::Create("bump-pointer space live bitmap", Begin(), @@ -92,7 +90,7 @@ void BumpPointerSpace::Clear() { growth_end_ = Limit(); { MutexLock mu(Thread::Current(), block_lock_); - num_blocks_ = 0; + block_sizes_.clear(); main_block_size_ = 0; } } @@ -103,11 +101,6 @@ void BumpPointerSpace::Dump(std::ostream& os) const { << reinterpret_cast<void*>(Limit()); } -mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) { - const uintptr_t position = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf(); - return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment)); -} - size_t BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) { MutexLock mu(Thread::Current(), block_lock_); RevokeThreadLocalBuffersLocked(thread); @@ -147,23 +140,19 @@ void BumpPointerSpace::AssertAllThreadLocalBuffersAreRevoked() { } void BumpPointerSpace::UpdateMainBlock() { - DCHECK_EQ(num_blocks_, 0U); + DCHECK(block_sizes_.empty()); main_block_size_ = Size(); } // Returns the start of the storage. uint8_t* BumpPointerSpace::AllocBlock(size_t bytes) { bytes = RoundUp(bytes, kAlignment); - if (!num_blocks_) { + if (block_sizes_.empty()) { UpdateMainBlock(); } - uint8_t* storage = reinterpret_cast<uint8_t*>( - AllocNonvirtualWithoutAccounting(bytes + sizeof(BlockHeader))); + uint8_t* storage = reinterpret_cast<uint8_t*>(AllocNonvirtualWithoutAccounting(bytes)); if (LIKELY(storage != nullptr)) { - BlockHeader* header = reinterpret_cast<BlockHeader*>(storage); - header->size_ = bytes; // Write out the block header. - storage += sizeof(BlockHeader); - ++num_blocks_; + block_sizes_.push_back(bytes); } return storage; } @@ -183,7 +172,7 @@ uint64_t BumpPointerSpace::GetBytesAllocated() { MutexLock mu3(Thread::Current(), block_lock_); // If we don't have any blocks, we don't have any thread local buffers. This check is required // since there can exist multiple bump pointer spaces which exist at the same time. - if (num_blocks_ > 0) { + if (!block_sizes_.empty()) { for (Thread* thread : thread_list) { total += thread->GetThreadLocalBytesAllocated(); } @@ -201,7 +190,7 @@ uint64_t BumpPointerSpace::GetObjectsAllocated() { MutexLock mu3(Thread::Current(), block_lock_); // If we don't have any blocks, we don't have any thread local buffers. This check is required // since there can exist multiple bump pointer spaces which exist at the same time. - if (num_blocks_ > 0) { + if (!block_sizes_.empty()) { for (Thread* thread : thread_list) { total += thread->GetThreadLocalObjectsAllocated(); } @@ -246,6 +235,52 @@ size_t BumpPointerSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* u return num_bytes; } +uint8_t* BumpPointerSpace::AlignEnd(Thread* self, size_t alignment) { + Locks::mutator_lock_->AssertExclusiveHeld(self); + DCHECK(IsAligned<kAlignment>(alignment)); + uint8_t* end = end_.load(std::memory_order_relaxed); + uint8_t* aligned_end = AlignUp(end, alignment); + ptrdiff_t diff = aligned_end - end; + if (diff > 0) { + end_.store(aligned_end, std::memory_order_relaxed); + // If we have blocks after the main one. Then just add the diff to the last + // block. + MutexLock mu(self, block_lock_); + if (!block_sizes_.empty()) { + block_sizes_.back() += diff; + } + } + return end; +} + +std::vector<size_t>* BumpPointerSpace::GetBlockSizes(Thread* self, size_t* main_block_size) { + std::vector<size_t>* block_sizes = nullptr; + MutexLock mu(self, block_lock_); + if (!block_sizes_.empty()) { + block_sizes = new std::vector<size_t>(block_sizes_.begin(), block_sizes_.end()); + } else { + UpdateMainBlock(); + } + *main_block_size = main_block_size_; + return block_sizes; +} + +void BumpPointerSpace::SetBlockSizes(Thread* self, + const size_t main_block_size, + const size_t first_valid_idx) { + MutexLock mu(self, block_lock_); + main_block_size_ = main_block_size; + if (!block_sizes_.empty()) { + block_sizes_.erase(block_sizes_.begin(), block_sizes_.begin() + first_valid_idx); + } + size_t size = main_block_size; + for (size_t block_size : block_sizes_) { + size += block_size; + } + DCHECK(IsAligned<kAlignment>(size)); + end_.store(Begin() + size, std::memory_order_relaxed); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h index a400bf39b9..230673d360 100644 --- a/runtime/gc/space/bump_pointer_space.h +++ b/runtime/gc/space/bump_pointer_space.h @@ -17,9 +17,10 @@ #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_ #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_ +#include "base/mutex.h" #include "space.h" -#include "base/mutex.h" +#include <deque> namespace art { @@ -30,6 +31,7 @@ class Object; namespace gc { namespace collector { +class MarkCompact; class MarkSweep; } // namespace collector @@ -57,7 +59,7 @@ class BumpPointerSpace final : public ContinuousMemMapAllocSpace { // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, size_t* usable_size, size_t* bytes_tl_bulk_allocated) - override REQUIRES(Locks::mutator_lock_); + override REQUIRES(Locks::mutator_lock_, !block_lock_); mirror::Object* AllocNonvirtual(size_t num_bytes); mirror::Object* AllocNonvirtualWithoutAccounting(size_t num_bytes); @@ -124,18 +126,9 @@ class BumpPointerSpace final : public ContinuousMemMapAllocSpace { return true; } - bool Contains(const mirror::Object* obj) const override { - const uint8_t* byte_obj = reinterpret_cast<const uint8_t*>(obj); - return byte_obj >= Begin() && byte_obj < End(); - } - // TODO: Change this? Mainly used for compacting to a particular region of memory. BumpPointerSpace(const std::string& name, uint8_t* begin, uint8_t* limit); - // Return the object which comes after obj, while ensuring alignment. - static mirror::Object* GetNextObject(mirror::Object* obj) - REQUIRES_SHARED(Locks::mutator_lock_); - // Allocate a new TLAB, returns false if the allocation failed. bool AllocNewTlab(Thread* self, size_t bytes) REQUIRES(!block_lock_); @@ -179,23 +172,40 @@ class BumpPointerSpace final : public ContinuousMemMapAllocSpace { AtomicInteger objects_allocated_; // Accumulated from revoked thread local regions. AtomicInteger bytes_allocated_; // Accumulated from revoked thread local regions. Mutex block_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - // The objects at the start of the space are stored in the main block. The main block doesn't - // have a header, this lets us walk empty spaces which are mprotected. + // The objects at the start of the space are stored in the main block. size_t main_block_size_ GUARDED_BY(block_lock_); - // The number of blocks in the space, if it is 0 then the space has one long continuous block - // which doesn't have an updated header. - size_t num_blocks_ GUARDED_BY(block_lock_); + // List of block sizes (in bytes) after the main-block. Needed for Walk(). + // If empty then the space has only one long continuous block. Each TLAB + // allocation has one entry in this deque. + // Keeping block-sizes off-heap simplifies sliding compaction algorithms. + // The compaction algorithm should ideally compact all objects into the main + // block, thereby enabling erasing corresponding entries from here. + std::deque<size_t> block_sizes_ GUARDED_BY(block_lock_); private: - struct BlockHeader { - size_t size_; // Size of the block in bytes, does not include the header. - size_t unused_; // Ensures alignment of kAlignment. - }; + // Return the object which comes after obj, while ensuring alignment. + static mirror::Object* GetNextObject(mirror::Object* obj) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Return a vector of block sizes on the space. Required by MarkCompact GC for + // walking black objects allocated after marking phase. + std::vector<size_t>* GetBlockSizes(Thread* self, size_t* main_block_size) REQUIRES(!block_lock_); + + // Once the MarkCompact decides the post-compact layout of the space in the + // pre-compaction pause, it calls this function to update the block sizes. It is + // done by passing the new main-block size, which consumes a bunch of blocks + // into itself, and the index of first unconsumed block. This works as all the + // block sizes are ordered. Also updates 'end_' to reflect the change. + void SetBlockSizes(Thread* self, const size_t main_block_size, const size_t first_valid_idx) + REQUIRES(!block_lock_, Locks::mutator_lock_); - static_assert(sizeof(BlockHeader) % kAlignment == 0, - "continuous block must be kAlignment aligned"); + // Align end to the given alignment. This is done in MarkCompact GC when + // mutators are suspended so that upcoming TLAB allocations start with a new + // page. Returns the pre-alignment end. + uint8_t* AlignEnd(Thread* self, size_t alignment) REQUIRES(Locks::mutator_lock_); friend class collector::MarkSweep; + friend class collector::MarkCompact; DISALLOW_COPY_AND_ASSIGN(BumpPointerSpace); }; diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 3eb1701080..6c83741df0 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -422,7 +422,6 @@ void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) { // TODO: Do not use IsMarked for j.l.Class, and adjust once we move this method // out of the weak access/creation pause. b/32167580 if (new_object != nullptr && new_object != object) { - DCHECK(new_object->IsString()); roots[i] = GcRoot<mirror::Object>(new_object); } } else { diff --git a/runtime/jni/jni_internal.cc b/runtime/jni/jni_internal.cc index 9d8c4f33f5..8325bc3720 100644 --- a/runtime/jni/jni_internal.cc +++ b/runtime/jni/jni_internal.cc @@ -2178,13 +2178,9 @@ class JNI { if (heap->IsMovableObject(s)) { StackHandleScope<1> hs(soa.Self()); HandleWrapperObjPtr<mirror::String> h(hs.NewHandleWrapper(&s)); - if (!kUseReadBarrier) { - heap->IncrementDisableMovingGC(soa.Self()); - } else { - // For the CC collector, we only need to wait for the thread flip rather - // than the whole GC to occur thanks to the to-space invariant. - heap->IncrementDisableThreadFlip(soa.Self()); - } + // For the CC and CMC collector, we only need to wait for the thread flip rather + // than the whole GC to occur thanks to the to-space invariant. + heap->IncrementDisableThreadFlip(soa.Self()); } if (is_copy != nullptr) { *is_copy = JNI_FALSE; @@ -2201,11 +2197,7 @@ class JNI { gc::Heap* heap = Runtime::Current()->GetHeap(); ObjPtr<mirror::String> s = soa.Decode<mirror::String>(java_string); if (!s->IsCompressed() && heap->IsMovableObject(s)) { - if (!kUseReadBarrier) { - heap->DecrementDisableMovingGC(soa.Self()); - } else { - heap->DecrementDisableThreadFlip(soa.Self()); - } + heap->DecrementDisableThreadFlip(soa.Self()); } // TODO: For uncompressed strings GetStringCritical() always returns `s->GetValue()`. // Should we report an error if the user passes a different `chars`? @@ -2368,13 +2360,9 @@ class JNI { } gc::Heap* heap = Runtime::Current()->GetHeap(); if (heap->IsMovableObject(array)) { - if (!kUseReadBarrier) { - heap->IncrementDisableMovingGC(soa.Self()); - } else { - // For the CC collector, we only need to wait for the thread flip rather than the whole GC - // to occur thanks to the to-space invariant. - heap->IncrementDisableThreadFlip(soa.Self()); - } + // For the CC and CMC collector, we only need to wait for the thread flip rather + // than the whole GC to occur thanks to the to-space invariant. + heap->IncrementDisableThreadFlip(soa.Self()); // Re-decode in case the object moved since IncrementDisableGC waits for GC to complete. array = soa.Decode<mirror::Array>(java_array); } @@ -2969,11 +2957,7 @@ class JNI { delete[] reinterpret_cast<uint64_t*>(elements); } else if (heap->IsMovableObject(array)) { // Non copy to a movable object must means that we had disabled the moving GC. - if (!kUseReadBarrier) { - heap->DecrementDisableMovingGC(soa.Self()); - } else { - heap->DecrementDisableThreadFlip(soa.Self()); - } + heap->DecrementDisableThreadFlip(soa.Self()); } } } diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h index b7a8a8ce49..a7faa376cd 100644 --- a/runtime/mirror/array-inl.h +++ b/runtime/mirror/array-inl.h @@ -36,12 +36,11 @@ inline uint32_t Array::ClassSize(PointerSize pointer_size) { return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size); } -template<VerifyObjectFlags kVerifyFlags> +template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption> inline size_t Array::SizeOf() { - // No read barrier is needed for reading a constant primitive field through - // constant reference field chain. See ReadBarrierOption. size_t component_size_shift = - GetClass<kVerifyFlags, kWithoutReadBarrier>()->GetComponentSizeShift(); + GetClass<kVerifyFlags, kReadBarrierOption>() + ->template GetComponentSizeShift<kReadBarrierOption>(); // Don't need to check this since we already check this in GetClass. int32_t component_count = GetLength<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>(); diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h index 4bf9deebfe..dfe7d475c1 100644 --- a/runtime/mirror/array.h +++ b/runtime/mirror/array.h @@ -58,7 +58,8 @@ class MANAGED Array : public Object { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); - template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, + ReadBarrierOption kReadBarrierOption = kWithoutReadBarrier> size_t SizeOf() REQUIRES_SHARED(Locks::mutator_lock_); template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> ALWAYS_INLINE int32_t GetLength() REQUIRES_SHARED(Locks::mutator_lock_) { diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h index b6bd22eb76..77f78c5156 100644 --- a/runtime/mirror/class-inl.h +++ b/runtime/mirror/class-inl.h @@ -1077,10 +1077,9 @@ inline size_t Class::GetComponentSize() { return 1U << GetComponentSizeShift(); } +template <ReadBarrierOption kReadBarrierOption> inline size_t Class::GetComponentSizeShift() { - // No read barrier is needed for reading a constant primitive field through - // constant reference field. See ReadBarrierOption. - return GetComponentType<kDefaultVerifyFlags, kWithoutReadBarrier>()->GetPrimitiveTypeSizeShift(); + return GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveTypeSizeShift(); } inline bool Class::IsObjectClass() { @@ -1106,11 +1105,9 @@ inline bool Class::IsArrayClass() { return GetComponentType<kVerifyFlags, kWithoutReadBarrier>() != nullptr; } -template<VerifyObjectFlags kVerifyFlags> +template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption> inline bool Class::IsObjectArrayClass() { - // We do not need a read barrier here as the primitive type is constant, - // both from-space and to-space component type classes shall yield the same result. - const ObjPtr<Class> component_type = GetComponentType<kVerifyFlags, kWithoutReadBarrier>(); + const ObjPtr<Class> component_type = GetComponentType<kVerifyFlags, kReadBarrierOption>(); constexpr VerifyObjectFlags kNewFlags = RemoveThisFlags(kVerifyFlags); return component_type != nullptr && !component_type->IsPrimitive<kNewFlags>(); } diff --git a/runtime/mirror/class-refvisitor-inl.h b/runtime/mirror/class-refvisitor-inl.h index 8c85387d6a..1dd7c107d4 100644 --- a/runtime/mirror/class-refvisitor-inl.h +++ b/runtime/mirror/class-refvisitor-inl.h @@ -30,6 +30,11 @@ template <bool kVisitNativeRoots, ReadBarrierOption kReadBarrierOption, typename Visitor> inline void Class::VisitReferences(ObjPtr<Class> klass, const Visitor& visitor) { + if (kVisitNativeRoots) { + // Since this class is reachable, we must also visit the associated roots when we scan it. + VisitNativeRoots<kReadBarrierOption>( + visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize()); + } VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass.Ptr(), visitor); // Right after a class is allocated, but not yet loaded // (ClassStatus::kNotReady, see ClassLinker::LoadClass()), GC may find it @@ -44,18 +49,15 @@ inline void Class::VisitReferences(ObjPtr<Class> klass, const Visitor& visitor) // linked yet. VisitStaticFieldsReferences<kVerifyFlags, kReadBarrierOption>(this, visitor); } - if (kVisitNativeRoots) { - // Since this class is reachable, we must also visit the associated roots when we scan it. - VisitNativeRoots<kReadBarrierOption>( - visitor, Runtime::Current()->GetClassLinker()->GetImagePointerSize()); - } } template<ReadBarrierOption kReadBarrierOption, class Visitor> void Class::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) { VisitFields<kReadBarrierOption>([&](ArtField* field) REQUIRES_SHARED(art::Locks::mutator_lock_) { field->VisitRoots(visitor); - if (kIsDebugBuild && IsResolved()) { + // TODO: Once concurrent mark-compact GC is made concurrent and stops using + // kVisitNativeRoots, remove the following condition + if (kIsDebugBuild && !kUseUserfaultfd && IsResolved()) { CHECK_EQ(field->GetDeclaringClass<kReadBarrierOption>(), this) << GetStatus() << field->GetDeclaringClass()->PrettyClass() << " != " << PrettyClass(); } diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index 0b0ef488de..ecf7856642 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -486,6 +486,7 @@ class MANAGED Class final : public Object { size_t GetComponentSize() REQUIRES_SHARED(Locks::mutator_lock_); + template<ReadBarrierOption kReadBarrierOption = kWithoutReadBarrier> size_t GetComponentSizeShift() REQUIRES_SHARED(Locks::mutator_lock_); bool IsObjectClass() REQUIRES_SHARED(Locks::mutator_lock_); @@ -495,7 +496,8 @@ class MANAGED Class final : public Object { template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> bool IsInstantiable() REQUIRES_SHARED(Locks::mutator_lock_); - template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, + ReadBarrierOption kReadBarrierOption = kWithoutReadBarrier> ALWAYS_INLINE bool IsObjectArrayClass() REQUIRES_SHARED(Locks::mutator_lock_); template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index c679fded32..0ac71bfa2c 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -880,7 +880,7 @@ inline void Object::VisitFieldsReferences(uint32_t ref_offsets, const Visitor& v // inheritance hierarchy and find reference offsets the hard way. In the static case, just // consider this class. for (ObjPtr<Class> klass = kIsStatic - ? AsClass<kVerifyFlags>() + ? ObjPtr<Class>::DownCast(this) : GetClass<kVerifyFlags, kReadBarrierOption>(); klass != nullptr; klass = kIsStatic ? nullptr : klass->GetSuperClass<kVerifyFlags, kReadBarrierOption>()) { diff --git a/runtime/mirror/object-refvisitor-inl.h b/runtime/mirror/object-refvisitor-inl.h index 2361d79f3f..0931266891 100644 --- a/runtime/mirror/object-refvisitor-inl.h +++ b/runtime/mirror/object-refvisitor-inl.h @@ -104,7 +104,7 @@ inline size_t Object::VisitRefsForCompaction(const Visitor& visitor, size_t size; // We want to continue using pre-compact klass to avoid cascading faults. ObjPtr<Class> klass = GetClass<kVerifyFlags, kReadBarrierOption>(); - visitor(this, ClassOffset(), /* is_static= */ false); + DCHECK(klass != nullptr) << "obj=" << this; const uint32_t class_flags = klass->GetClassFlags<kVerifyNone>(); if (LIKELY(class_flags == kClassFlagNormal)) { DCHECK((!klass->IsVariableSize<kVerifyFlags>())); @@ -119,48 +119,49 @@ inline size_t Object::VisitRefsForCompaction(const Visitor& visitor, DCHECK(!klass->IsStringClass<kVerifyFlags>()); if (class_flags == kClassFlagClass) { DCHECK((klass->IsClassClass<kVerifyFlags>())); - ObjPtr<Class> as_klass = AsClass<kVerifyNone>(); + ObjPtr<Class> as_klass = ObjPtr<Class>::DownCast(this); as_klass->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass, visitor); - return kFetchObjSize ? as_klass->SizeOf<kSizeOfFlags>() : 0; + size = kFetchObjSize ? as_klass->SizeOf<kSizeOfFlags>() : 0; } else if (class_flags == kClassFlagObjectArray) { - DCHECK((klass->IsObjectArrayClass<kVerifyFlags>())); - ObjPtr<ObjectArray<mirror::Object>> obj_arr = AsObjectArray<mirror::Object, kVerifyNone>(); + DCHECK((klass->IsObjectArrayClass<kVerifyFlags, kReadBarrierOption>())); + ObjPtr<ObjectArray<Object>> obj_arr = ObjPtr<ObjectArray<Object>>::DownCast(this); obj_arr->VisitReferences(visitor, begin, end); - return kFetchObjSize ? obj_arr->SizeOf<kSizeOfFlags>() : 0; + size = kFetchObjSize ? obj_arr->SizeOf<kSizeOfFlags, kReadBarrierOption>() : 0; } else if ((class_flags & kClassFlagReference) != 0) { VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor); // Visit referent also as this is about updating the reference only. // There is no reference processing happening here. visitor(this, mirror::Reference::ReferentOffset(), /* is_static= */ false); - if ((class_flags & kClassFlagFinalizerReference) != 0) { - visitor(this, mirror::FinalizerReference::ZombieOffset(), /* is_static= */ false); - } + size = kFetchObjSize ? klass->GetObjectSize<kSizeOfFlags>() : 0; } else if (class_flags == kClassFlagDexCache) { - ObjPtr<mirror::DexCache> const dex_cache = AsDexCache<kVerifyFlags, kReadBarrierOption>(); + ObjPtr<DexCache> const dex_cache = ObjPtr<DexCache>::DownCast(this); dex_cache->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass, visitor); + size = kFetchObjSize ? klass->GetObjectSize<kSizeOfFlags>() : 0; } else { - ObjPtr<mirror::ClassLoader> const class_loader = - AsClassLoader<kVerifyFlags, kReadBarrierOption>(); + ObjPtr<ClassLoader> const class_loader = ObjPtr<ClassLoader>::DownCast(this); class_loader->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass, visitor); + size = kFetchObjSize ? klass->GetObjectSize<kSizeOfFlags>() : 0; } - size = kFetchObjSize ? klass->GetObjectSize<kSizeOfFlags>() : 0; } else { DCHECK((!klass->IsClassClass<kVerifyFlags>())); DCHECK((!klass->IsObjectArrayClass<kVerifyFlags>())); - if (class_flags == kClassFlagString) { - size = kFetchObjSize ? AsString<kSizeOfFlags>()->template SizeOf<kSizeOfFlags>() : 0; + if ((class_flags & kClassFlagString) != 0) { + size = kFetchObjSize ? static_cast<String*>(this)->SizeOf<kSizeOfFlags>() : 0; } else if (klass->IsArrayClass<kVerifyFlags>()) { // TODO: We can optimize this by implementing a SizeOf() version which takes // component-size-shift as an argument, thereby avoiding multiple loads of // component_type. - size = kFetchObjSize ? AsArray<kSizeOfFlags>()->template SizeOf<kSizeOfFlags>() : 0; + size = kFetchObjSize + ? static_cast<Array*>(this)->SizeOf<kSizeOfFlags, kReadBarrierOption>() + : 0; } else { - DCHECK_NE(class_flags & kClassFlagNormal, 0u); + DCHECK_EQ(class_flags, kClassFlagNoReferenceFields) + << "class_flags: " << std::hex << class_flags; // Only possibility left is of a normal klass instance with no references. size = kFetchObjSize ? klass->GetObjectSize<kSizeOfFlags>() : 0; } @@ -183,6 +184,7 @@ inline size_t Object::VisitRefsForCompaction(const Visitor& visitor, } } } + visitor(this, ClassOffset(), /* is_static= */ false); return size; } diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index d83f3d6b00..97fc9385d4 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -653,7 +653,7 @@ class MANAGED LOCKABLE Object { template <bool kFetchObjSize = true, bool kVisitNativeRoots = true, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, - ReadBarrierOption kReadBarrierOption = kWithReadBarrier, + ReadBarrierOption kReadBarrierOption = kWithFromSpaceBarrier, typename Visitor> size_t VisitRefsForCompaction(const Visitor& visitor, MemberOffset begin, diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index 6bc8f0b979..f028473e83 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -21,6 +21,7 @@ #include "gc/accounting/read_barrier_table.h" #include "gc/collector/concurrent_copying-inl.h" +#include "gc/collector/mark_compact.h" #include "gc/heap.h" #include "mirror/object-readbarrier-inl.h" #include "mirror/object_reference.h" @@ -91,10 +92,12 @@ inline MirrorType* ReadBarrier::Barrier( LOG(FATAL) << "Unexpected read barrier type"; UNREACHABLE(); } - } else if (with_read_barrier) { - // TODO: invoke MarkCompact's function which translates a pre-compact - // address to from-space address, if we are in the compaction phase. - return ref_addr->template AsMirrorPtr<kIsVolatile>(); + } else if (kReadBarrierOption == kWithFromSpaceBarrier) { + CHECK(kUseUserfaultfd); + MirrorType* old = ref_addr->template AsMirrorPtr<kIsVolatile>(); + mirror::Object* ref = + Runtime::Current()->GetHeap()->MarkCompactCollector()->GetFromSpaceAddrFromBarrier(old); + return reinterpret_cast<MirrorType*>(ref); } else { // No read barrier. return ref_addr->template AsMirrorPtr<kIsVolatile>(); diff --git a/runtime/read_barrier_config.h b/runtime/read_barrier_config.h index d505bedec5..3c5afad25f 100644 --- a/runtime/read_barrier_config.h +++ b/runtime/read_barrier_config.h @@ -63,6 +63,7 @@ static constexpr bool kUseTableLookupReadBarrier = false; #endif static constexpr bool kUseReadBarrier = kUseBakerReadBarrier || kUseTableLookupReadBarrier; +static constexpr bool kUseUserfaultfd = !kUseReadBarrier; // Debugging flag that forces the generation of read barriers, but // does not trigger the use of the concurrent copying GC. diff --git a/runtime/read_barrier_option.h b/runtime/read_barrier_option.h index d918d466c6..36fc2d27b8 100644 --- a/runtime/read_barrier_option.h +++ b/runtime/read_barrier_option.h @@ -84,6 +84,7 @@ namespace art { enum ReadBarrierOption { kWithReadBarrier, // Perform a read barrier. kWithoutReadBarrier, // Don't perform a read barrier. + kWithFromSpaceBarrier, // Get the from-space address for the given to-space address. Used by CMC }; } // namespace art diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 6194b816d6..e5ce8e5869 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -790,12 +790,12 @@ void Runtime::CallExitHook(jint status) { } } -void Runtime::SweepSystemWeaks(IsMarkedVisitor* visitor) { +void Runtime::SweepSystemWeaks(IsMarkedVisitor* visitor, const bool sweep_jit) { GetInternTable()->SweepInternTableWeaks(visitor); GetMonitorList()->SweepMonitorList(visitor); GetJavaVM()->SweepJniWeakGlobals(visitor); GetHeap()->SweepAllocationRecords(visitor); - if (GetJit() != nullptr) { + if (sweep_jit && GetJit() != nullptr) { // Visit JIT literal tables. Objects in these tables are classes and strings // and only classes can be affected by class unloading. The strings always // stay alive as they are strongly interned. diff --git a/runtime/runtime.h b/runtime/runtime.h index 133db0893c..7f00d77922 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -450,7 +450,7 @@ class Runtime { // Sweep system weaks, the system weak is deleted if the visitor return null. Otherwise, the // system weak is updated to be the visitor's returned value. - void SweepSystemWeaks(IsMarkedVisitor* visitor) + void SweepSystemWeaks(IsMarkedVisitor* visitor, bool sweep_jit = true) REQUIRES_SHARED(Locks::mutator_lock_); // Walk all reflective objects and visit their targets as well as any method/fields held by the diff --git a/runtime/thread.cc b/runtime/thread.cc index db43afe33f..2de60e4527 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -4016,7 +4016,11 @@ class ReferenceMapVisitor : public StackVisitor { // We are visiting the references in compiled frames, so we do not need // to know the inlined frames. : StackVisitor(thread, context, StackVisitor::StackWalkKind::kSkipInlinedFrames), - visitor_(visitor) {} + visitor_(visitor) { + gc::Heap* const heap = Runtime::Current()->GetHeap(); + visit_declaring_class_ = heap->CurrentCollectorType() != gc::CollectorType::kCollectorTypeCMC + || !heap->MarkCompactCollector()->IsCompacting(); + } bool VisitFrame() override REQUIRES_SHARED(Locks::mutator_lock_) { if (false) { @@ -4061,6 +4065,9 @@ class ReferenceMapVisitor : public StackVisitor { void VisitDeclaringClass(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS { + if (!visit_declaring_class_) { + return; + } ObjPtr<mirror::Class> klass = method->GetDeclaringClassUnchecked<kWithoutReadBarrier>(); // klass can be null for runtime methods. if (klass != nullptr) { @@ -4354,6 +4361,7 @@ class ReferenceMapVisitor : public StackVisitor { // Visitor for when we visit a root. RootVisitor& visitor_; + bool visit_declaring_class_; }; class RootCallbackVisitor { @@ -4590,6 +4598,15 @@ bool Thread::HasTlab() const { return has_tlab; } +void Thread::AdjustTlab(size_t slide_bytes) { + if (HasTlab()) { + tlsPtr_.thread_local_start -= slide_bytes; + tlsPtr_.thread_local_pos -= slide_bytes; + tlsPtr_.thread_local_end -= slide_bytes; + tlsPtr_.thread_local_limit -= slide_bytes; + } +} + std::ostream& operator<<(std::ostream& os, const Thread& thread) { thread.ShortDump(os); return os; diff --git a/runtime/thread.h b/runtime/thread.h index fd978293fd..f40f03f371 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -1065,17 +1065,14 @@ class Thread { } uint32_t GetDisableThreadFlipCount() const { - CHECK(kUseReadBarrier); return tls32_.disable_thread_flip_count; } void IncrementDisableThreadFlipCount() { - CHECK(kUseReadBarrier); ++tls32_.disable_thread_flip_count; } void DecrementDisableThreadFlipCount() { - CHECK(kUseReadBarrier); DCHECK_GT(tls32_.disable_thread_flip_count, 0U); --tls32_.disable_thread_flip_count; } @@ -1244,6 +1241,10 @@ class Thread { DCHECK_LE(tlsPtr_.thread_local_end, tlsPtr_.thread_local_limit); } + // Called from Concurrent mark-compact GC to slide the TLAB pointers backwards + // to adjust to post-compact addresses. + void AdjustTlab(size_t slide_bytes); + // Doesn't check that there is room. mirror::Object* AllocTlab(size_t bytes); void SetTlab(uint8_t* start, uint8_t* end, uint8_t* limit); |