diff options
Diffstat (limited to 'runtime/gc')
59 files changed, 6879 insertions, 978 deletions
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h index 5e6bd88d73..a90a31963b 100644 --- a/runtime/gc/accounting/atomic_stack.h +++ b/runtime/gc/accounting/atomic_stack.h @@ -130,6 +130,35 @@ class AtomicStack { } } + // Bump the back index by the given number of slots. Returns false if this + // operation will overflow the stack. New elements should be written + // to [*start_address, *end_address). + bool BumpBack(size_t num_slots, + StackReference<T>** start_address, + StackReference<T>** end_address) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (kIsDebugBuild) { + debug_is_sorted_ = false; + } + const int32_t index = back_index_.load(std::memory_order_relaxed); + const int32_t new_index = index + num_slots; + if (UNLIKELY(static_cast<size_t>(new_index) >= growth_limit_)) { + // Stack overflow. + return false; + } + back_index_.store(new_index, std::memory_order_relaxed); + *start_address = begin_ + index; + *end_address = begin_ + new_index; + if (kIsDebugBuild) { + // Check the memory is zero. + for (int32_t i = index; i < new_index; i++) { + DCHECK_EQ(begin_[i].AsMirrorPtr(), static_cast<T*>(nullptr)) + << "i=" << i << " index=" << index << " new_index=" << new_index; + } + } + return true; + } + void PushBack(T* value) REQUIRES_SHARED(Locks::mutator_lock_) { if (kIsDebugBuild) { debug_is_sorted_ = false; @@ -144,8 +173,16 @@ class AtomicStack { DCHECK_GT(back_index_.load(std::memory_order_relaxed), front_index_.load(std::memory_order_relaxed)); // Decrement the back index non atomically. - back_index_.store(back_index_.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); - return begin_[back_index_.load(std::memory_order_relaxed)].AsMirrorPtr(); + const int32_t index = back_index_.load(std::memory_order_relaxed) - 1; + back_index_.store(index, std::memory_order_relaxed); + T* ret = begin_[index].AsMirrorPtr(); + // In debug builds we expect the stack elements to be null, which may not + // always be the case if the stack is being reused without resetting it + // in-between. + if (kIsDebugBuild) { + begin_[index].Clear(); + } + return ret; } // Take an item from the front of the stack. diff --git a/runtime/gc/accounting/bitmap.cc b/runtime/gc/accounting/bitmap.cc index 37646b3728..bd10958496 100644 --- a/runtime/gc/accounting/bitmap.cc +++ b/runtime/gc/accounting/bitmap.cc @@ -21,6 +21,7 @@ #include "base/bit_utils.h" #include "base/mem_map.h" #include "card_table.h" +#include "gc/collector/mark_compact.h" #include "jit/jit_memory_region.h" namespace art { @@ -98,6 +99,7 @@ MemoryRangeBitmap<kAlignment>* MemoryRangeBitmap<kAlignment>::CreateFromMemMap( template class MemoryRangeBitmap<CardTable::kCardSize>; template class MemoryRangeBitmap<jit::kJitCodeAccountingBytes>; +template class MemoryRangeBitmap<collector::MarkCompact::kAlignment>; } // namespace accounting } // namespace gc diff --git a/runtime/gc/accounting/bitmap.h b/runtime/gc/accounting/bitmap.h index 68f2d049d0..06398d6b10 100644 --- a/runtime/gc/accounting/bitmap.h +++ b/runtime/gc/accounting/bitmap.h @@ -81,7 +81,7 @@ class Bitmap { void CopyFrom(Bitmap* source_bitmap); // Starting address of our internal storage. - uintptr_t* Begin() { + uintptr_t* Begin() const { return bitmap_begin_; } @@ -98,7 +98,7 @@ class Bitmap { std::string Dump() const; protected: - static constexpr size_t kBitsPerBitmapWord = sizeof(uintptr_t) * kBitsPerByte; + static constexpr size_t kBitsPerBitmapWord = kBitsPerIntPtrT; Bitmap(MemMap&& mem_map, size_t bitmap_size); ~Bitmap(); @@ -109,7 +109,9 @@ class Bitmap { template<bool kSetBit> ALWAYS_INLINE bool ModifyBit(uintptr_t bit_index); - // Backing storage for bitmap. + // Backing storage for bitmap. This is interpreted as an array of + // kBitsPerBitmapWord-sized integers, with bits assigned in each word little + // endian first. MemMap mem_map_; // This bitmap itself, word sized for efficiency in scanning. @@ -122,7 +124,7 @@ class Bitmap { DISALLOW_IMPLICIT_CONSTRUCTORS(Bitmap); }; -// One bit per kAlignment in range (start, end] +// One bit per kAlignment in range [start, end) template<size_t kAlignment> class MemoryRangeBitmap : public Bitmap { public: @@ -138,7 +140,7 @@ class MemoryRangeBitmap : public Bitmap { // End of the memory range that the bitmap covers. ALWAYS_INLINE uintptr_t CoverEnd() const { - return cover_end_; + return cover_begin_ + kAlignment * BitmapSize(); } // Return the address associated with a bit index. @@ -150,39 +152,47 @@ class MemoryRangeBitmap : public Bitmap { // Return the bit index associated with an address . ALWAYS_INLINE uintptr_t BitIndexFromAddr(uintptr_t addr) const { - DCHECK(HasAddress(addr)) << CoverBegin() << " <= " << addr << " < " << CoverEnd(); - return (addr - CoverBegin()) / kAlignment; + uintptr_t result = (addr - CoverBegin()) / kAlignment; + DCHECK(result < BitmapSize()) << CoverBegin() << " <= " << addr << " < " << CoverEnd(); + return result; } ALWAYS_INLINE bool HasAddress(const uintptr_t addr) const { - return cover_begin_ <= addr && addr < cover_end_; + // Don't use BitIndexFromAddr() here as the addr passed to this function + // could be outside the range. If addr < cover_begin_, then the result + // underflows to some very large value past the end of the bitmap. + // Therefore, all operations are unsigned here. + bool ret = (addr - CoverBegin()) / kAlignment < BitmapSize(); + if (ret) { + DCHECK(CoverBegin() <= addr && addr < CoverEnd()) + << CoverBegin() << " <= " << addr << " < " << CoverEnd(); + } + return ret; } ALWAYS_INLINE bool Set(uintptr_t addr) { return SetBit(BitIndexFromAddr(addr)); } - ALWAYS_INLINE bool Clear(size_t addr) { + ALWAYS_INLINE bool Clear(uintptr_t addr) { return ClearBit(BitIndexFromAddr(addr)); } - ALWAYS_INLINE bool Test(size_t addr) const { + ALWAYS_INLINE bool Test(uintptr_t addr) const { return TestBit(BitIndexFromAddr(addr)); } // Returns true if the object was previously set. - ALWAYS_INLINE bool AtomicTestAndSet(size_t addr) { + ALWAYS_INLINE bool AtomicTestAndSet(uintptr_t addr) { return AtomicTestAndSetBit(BitIndexFromAddr(addr)); } private: MemoryRangeBitmap(MemMap&& mem_map, uintptr_t begin, size_t num_bits) : Bitmap(std::move(mem_map), num_bits), - cover_begin_(begin), - cover_end_(begin + kAlignment * num_bits) {} + cover_begin_(begin) {} uintptr_t const cover_begin_; - uintptr_t const cover_end_; DISALLOW_IMPLICIT_CONSTRUCTORS(MemoryRangeBitmap); }; diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc index fdf1615f5e..b8b328c795 100644 --- a/runtime/gc/accounting/card_table.cc +++ b/runtime/gc/accounting/card_table.cc @@ -31,11 +31,6 @@ namespace art { namespace gc { namespace accounting { -constexpr size_t CardTable::kCardShift; -constexpr size_t CardTable::kCardSize; -constexpr uint8_t CardTable::kCardClean; -constexpr uint8_t CardTable::kCardDirty; - /* * Maintain a card table from the write barrier. All writes of * non-null values to heap addresses should go through an entry in diff --git a/runtime/gc/accounting/card_table_test.cc b/runtime/gc/accounting/card_table_test.cc index 12baaa4b4e..b34a883f52 100644 --- a/runtime/gc/accounting/card_table_test.cc +++ b/runtime/gc/accounting/card_table_test.cc @@ -19,8 +19,8 @@ #include <string> #include "base/atomic.h" +#include "base/common_art_test.h" #include "base/utils.h" -#include "common_runtime_test.h" #include "handle_scope-inl.h" #include "mirror/class-inl.h" #include "mirror/string-inl.h" // Strings are easiest to allocate @@ -36,7 +36,7 @@ class Object; namespace gc { namespace accounting { -class CardTableTest : public CommonRuntimeTest { +class CardTableTest : public CommonArtTest { public: std::unique_ptr<CardTable> card_table_; diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc index b4026fc3f3..4a84799431 100644 --- a/runtime/gc/accounting/mod_union_table.cc +++ b/runtime/gc/accounting/mod_union_table.cc @@ -388,6 +388,11 @@ void ModUnionTableReferenceCache::Dump(std::ostream& os) { void ModUnionTableReferenceCache::VisitObjects(ObjectCallback callback, void* arg) { CardTable* const card_table = heap_->GetCardTable(); ContinuousSpaceBitmap* live_bitmap = space_->GetLiveBitmap(); + // Use an unordered_set for constant time search of card in the second loop. + // We don't want to change cleared_cards_ to unordered so that traversals are + // sequential in address order. + // TODO: Optimize this. + std::unordered_set<const uint8_t*> card_lookup_map; for (uint8_t* card : cleared_cards_) { uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card)); uintptr_t end = start + CardTable::kCardSize; @@ -396,10 +401,13 @@ void ModUnionTableReferenceCache::VisitObjects(ObjectCallback callback, void* ar [callback, arg](mirror::Object* obj) { callback(obj, arg); }); + card_lookup_map.insert(card); } - // This may visit the same card twice, TODO avoid this. for (const auto& pair : references_) { const uint8_t* card = pair.first; + if (card_lookup_map.find(card) != card_lookup_map.end()) { + continue; + } uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card)); uintptr_t end = start + CardTable::kCardSize; live_bitmap->VisitMarkedRange(start, diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc index e42682a112..3f38f5069e 100644 --- a/runtime/gc/accounting/mod_union_table_test.cc +++ b/runtime/gc/accounting/mod_union_table_test.cc @@ -46,6 +46,7 @@ class ModUnionTableFactory { class ModUnionTableTest : public CommonRuntimeTest { public: ModUnionTableTest() : java_lang_object_array_(nullptr) { + use_boot_image_ = true; // Make the Runtime creation cheaper. } mirror::ObjectArray<mirror::Object>* AllocObjectArray( Thread* self, space::ContinuousMemMapAllocSpace* space, size_t component_count) diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h index d460e00075..e7825e6953 100644 --- a/runtime/gc/accounting/space_bitmap-inl.h +++ b/runtime/gc/accounting/space_bitmap-inl.h @@ -64,7 +64,44 @@ inline bool SpaceBitmap<kAlignment>::Test(const mirror::Object* obj) const { } template<size_t kAlignment> -template<typename Visitor> +inline mirror::Object* SpaceBitmap<kAlignment>::FindPrecedingObject(uintptr_t visit_begin, + uintptr_t visit_end) const { + // Covers [visit_end, visit_begin]. + visit_end = std::max(heap_begin_, visit_end); + DCHECK_LE(visit_end, visit_begin); + DCHECK_LT(visit_begin, HeapLimit()); + + const uintptr_t offset_start = visit_begin - heap_begin_; + const uintptr_t offset_end = visit_end - heap_begin_; + uintptr_t index_start = OffsetToIndex(offset_start); + const uintptr_t index_end = OffsetToIndex(offset_end); + + // Start with the right edge + uintptr_t word = bitmap_begin_[index_start].load(std::memory_order_relaxed); + // visit_begin could be the first word of the object we are looking for. + const uintptr_t right_edge_mask = OffsetToMask(offset_start); + word &= right_edge_mask | (right_edge_mask - 1); + while (index_start > index_end) { + if (word != 0) { + const uintptr_t ptr_base = IndexToOffset(index_start) + heap_begin_; + size_t pos_leading_set_bit = kBitsPerIntPtrT - CLZ(word) - 1; + return reinterpret_cast<mirror::Object*>(ptr_base + pos_leading_set_bit * kAlignment); + } + word = bitmap_begin_[--index_start].load(std::memory_order_relaxed); + } + + word &= ~(OffsetToMask(offset_end) - 1); + if (word != 0) { + const uintptr_t ptr_base = IndexToOffset(index_end) + heap_begin_; + size_t pos_leading_set_bit = kBitsPerIntPtrT - CLZ(word) - 1; + return reinterpret_cast<mirror::Object*>(ptr_base + pos_leading_set_bit * kAlignment); + } else { + return nullptr; + } +} + +template<size_t kAlignment> +template<bool kVisitOnce, typename Visitor> inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end, Visitor&& visitor) const { @@ -114,6 +151,9 @@ inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, const size_t shift = CTZ(left_edge); mirror::Object* obj = reinterpret_cast<mirror::Object*>(ptr_base + shift * kAlignment); visitor(obj); + if (kVisitOnce) { + return; + } left_edge ^= (static_cast<uintptr_t>(1)) << shift; } while (left_edge != 0); } @@ -128,6 +168,9 @@ inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, const size_t shift = CTZ(w); mirror::Object* obj = reinterpret_cast<mirror::Object*>(ptr_base + shift * kAlignment); visitor(obj); + if (kVisitOnce) { + return; + } w ^= (static_cast<uintptr_t>(1)) << shift; } while (w != 0); } @@ -155,6 +198,9 @@ inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, const size_t shift = CTZ(right_edge); mirror::Object* obj = reinterpret_cast<mirror::Object*>(ptr_base + shift * kAlignment); visitor(obj); + if (kVisitOnce) { + return; + } right_edge ^= (static_cast<uintptr_t>(1)) << shift; } while (right_edge != 0); } diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc index 3c5688d5bd..a0458d2ae1 100644 --- a/runtime/gc/accounting/space_bitmap.cc +++ b/runtime/gc/accounting/space_bitmap.cc @@ -16,6 +16,9 @@ #include "space_bitmap-inl.h" +#include <iomanip> +#include <sstream> + #include "android-base/stringprintf.h" #include "art_field-inl.h" @@ -113,6 +116,37 @@ std::string SpaceBitmap<kAlignment>::Dump() const { reinterpret_cast<void*>(HeapLimit())); } +template <size_t kAlignment> +std::string SpaceBitmap<kAlignment>::DumpMemAround(mirror::Object* obj) const { + uintptr_t addr = reinterpret_cast<uintptr_t>(obj); + DCHECK_GE(addr, heap_begin_); + DCHECK(HasAddress(obj)) << obj; + const uintptr_t offset = addr - heap_begin_; + const size_t index = OffsetToIndex(offset); + const uintptr_t mask = OffsetToMask(offset); + size_t num_entries = bitmap_size_ / sizeof(uintptr_t); + DCHECK_LT(index, num_entries) << " bitmap_size_ = " << bitmap_size_; + Atomic<uintptr_t>* atomic_entry = &bitmap_begin_[index]; + uintptr_t prev = 0; + uintptr_t next = 0; + if (index > 0) { + prev = (atomic_entry - 1)->load(std::memory_order_relaxed); + } + uintptr_t curr = atomic_entry->load(std::memory_order_relaxed); + if (index < num_entries - 1) { + next = (atomic_entry + 1)->load(std::memory_order_relaxed); + } + std::ostringstream oss; + oss << " offset: " << offset + << " index: " << index + << " mask: " << std::hex << std::setfill('0') << std::setw(16) << mask + << " words {" << std::hex << std::setfill('0') << std::setw(16) << prev + << ", " << std::hex << std::setfill('0') << std::setw(16) << curr + << ", " << std::hex <<std::setfill('0') << std::setw(16) << next + << "}"; + return oss.str(); +} + template<size_t kAlignment> void SpaceBitmap<kAlignment>::Clear() { if (bitmap_begin_ != nullptr) { diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h index 0d8ffa0d67..e3189331c4 100644 --- a/runtime/gc/accounting/space_bitmap.h +++ b/runtime/gc/accounting/space_bitmap.h @@ -40,8 +40,8 @@ namespace accounting { template<size_t kAlignment> class SpaceBitmap { public: - typedef void ScanCallback(mirror::Object* obj, void* finger, void* arg); - typedef void SweepCallback(size_t ptr_count, mirror::Object** ptrs, void* arg); + using ScanCallback = void(mirror::Object* obj, void* finger, void* arg); + using SweepCallback = void(size_t ptr_count, mirror::Object** ptrs, void* arg); // Initialize a space bitmap so that it points to a bitmap large enough to cover a heap at // heap_begin of heap_capacity bytes, where objects are guaranteed to be kAlignment-aligned. @@ -131,10 +131,15 @@ class SpaceBitmap { } } - // Visit the live objects in the range [visit_begin, visit_end). + // Find first object while scanning bitmap backwards from visit_begin -> visit_end. + // Covers [visit_end, visit_begin] range. + mirror::Object* FindPrecedingObject(uintptr_t visit_begin, uintptr_t visit_end = 0) const; + + // Visit the live objects in the range [visit_begin, visit_end). If kVisitOnce + // is true, then only the first live object will be visited. // TODO: Use lock annotations when clang is fixed. // REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_); - template <typename Visitor> + template <bool kVisitOnce = false, typename Visitor> void VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end, Visitor&& visitor) const NO_THREAD_SAFETY_ANALYSIS; @@ -159,7 +164,7 @@ class SpaceBitmap { void CopyFrom(SpaceBitmap* source_bitmap); // Starting address of our internal storage. - Atomic<uintptr_t>* Begin() { + Atomic<uintptr_t>* Begin() const { return bitmap_begin_; } @@ -202,6 +207,9 @@ class SpaceBitmap { std::string Dump() const; + // Dump three bitmap words around obj. + std::string DumpMemAround(mirror::Object* obj) const; + // Helper function for computing bitmap size based on a 64 bit capacity. static size_t ComputeBitmapSize(uint64_t capacity); static size_t ComputeHeapSize(uint64_t bitmap_bytes); diff --git a/runtime/gc/accounting/space_bitmap_test.cc b/runtime/gc/accounting/space_bitmap_test.cc index 3a69865267..8fcf102406 100644 --- a/runtime/gc/accounting/space_bitmap_test.cc +++ b/runtime/gc/accounting/space_bitmap_test.cc @@ -19,8 +19,8 @@ #include <stdint.h> #include <memory> +#include "base/common_art_test.h" #include "base/mutex.h" -#include "common_runtime_test.h" #include "runtime_globals.h" #include "space_bitmap-inl.h" @@ -28,7 +28,7 @@ namespace art { namespace gc { namespace accounting { -class SpaceBitmapTest : public CommonRuntimeTest {}; +class SpaceBitmapTest : public CommonArtTest {}; TEST_F(SpaceBitmapTest, Init) { uint8_t* heap_begin = reinterpret_cast<uint8_t*>(0x10000000); diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc index 7bcf375b16..f0d379fde6 100644 --- a/runtime/gc/allocation_record.cc +++ b/runtime/gc/allocation_record.cc @@ -59,6 +59,11 @@ AllocRecordObjectMap::~AllocRecordObjectMap() { } void AllocRecordObjectMap::VisitRoots(RootVisitor* visitor) { + // When we are compacting in userfaultfd GC, the class GC-roots are already + // updated in SweepAllocationRecords()->SweepClassObject(). + if (Runtime::Current()->GetHeap()->IsPerformingUffdCompaction()) { + return; + } CHECK_LE(recent_record_max_, alloc_record_max_); BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(visitor, RootInfo(kRootDebugger)); size_t count = recent_record_max_; @@ -92,7 +97,10 @@ static inline void SweepClassObject(AllocRecord* record, IsMarkedVisitor* visito mirror::Object* new_object = visitor->IsMarked(old_object); DCHECK(new_object != nullptr); if (UNLIKELY(old_object != new_object)) { - klass = GcRoot<mirror::Class>(new_object->AsClass()); + // We can't use AsClass() as it uses IsClass in a DCHECK, which expects + // the class' contents to be there. This is not the case in userfaultfd + // GC. + klass = GcRoot<mirror::Class>(ObjPtr<mirror::Class>::DownCast(new_object)); } } } @@ -131,13 +139,13 @@ void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedVisitor* visitor) { } void AllocRecordObjectMap::AllowNewAllocationRecords() { - CHECK(!kUseReadBarrier); + CHECK(!gUseReadBarrier); allow_new_record_ = true; new_record_condition_.Broadcast(Thread::Current()); } void AllocRecordObjectMap::DisallowNewAllocationRecords() { - CHECK(!kUseReadBarrier); + CHECK(!gUseReadBarrier); allow_new_record_ = false; } @@ -230,8 +238,8 @@ void AllocRecordObjectMap::RecordAllocation(Thread* self, // Since nobody seemed to really notice or care it might not be worth the trouble. // Wait for GC's sweeping to complete and allow new records. - while (UNLIKELY((!kUseReadBarrier && !allow_new_record_) || - (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { + while (UNLIKELY((!gUseReadBarrier && !allow_new_record_) || + (gUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the // presence of threads blocking for weak ref access. self->CheckEmptyCheckpointFromWeakRefAccess(Locks::alloc_tracker_lock_); diff --git a/runtime/gc/allocator/dlmalloc.cc b/runtime/gc/allocator/art-dlmalloc.cc index 79d4fbfb5a..de0c85a407 100644 --- a/runtime/gc/allocator/dlmalloc.cc +++ b/runtime/gc/allocator/art-dlmalloc.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "dlmalloc.h" +#include "art-dlmalloc.h" #include <android-base/logging.h> @@ -39,8 +39,8 @@ static void art_heap_usage_error(const char* function, void* p); #pragma GCC diagnostic ignored "-Wstrict-aliasing" #pragma GCC diagnostic ignored "-Wnull-pointer-arithmetic" #pragma GCC diagnostic ignored "-Wexpansion-to-defined" -#include "../../../external/dlmalloc/malloc.c" -// Note: malloc.c uses a DEBUG define to drive debug code. This interferes with the DEBUG severity +#include "dlmalloc.c" // NOLINT +// Note: dlmalloc.c uses a DEBUG define to drive debug code. This interferes with the DEBUG severity // of libbase, so undefine it now. #undef DEBUG #pragma GCC diagnostic pop diff --git a/runtime/gc/allocator/dlmalloc.h b/runtime/gc/allocator/art-dlmalloc.h index b12691ad0e..296de72c70 100644 --- a/runtime/gc/allocator/dlmalloc.h +++ b/runtime/gc/allocator/art-dlmalloc.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef ART_RUNTIME_GC_ALLOCATOR_DLMALLOC_H_ -#define ART_RUNTIME_GC_ALLOCATOR_DLMALLOC_H_ +#ifndef ART_RUNTIME_GC_ALLOCATOR_ART_DLMALLOC_H_ +#define ART_RUNTIME_GC_ALLOCATOR_ART_DLMALLOC_H_ #include <cstdint> @@ -33,7 +33,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wredundant-decls" #pragma GCC diagnostic ignored "-Wnull-pointer-arithmetic" -#include "../../external/dlmalloc/malloc.h" +#include "dlmalloc.h" #pragma GCC diagnostic pop // Callback for dlmalloc_inspect_all or mspace_inspect_all that will madvise(2) unused @@ -58,4 +58,4 @@ void* ArtDlMallocMoreCore(void* mspace, intptr_t increment); } // namespace gc } // namespace art -#endif // ART_RUNTIME_GC_ALLOCATOR_DLMALLOC_H_ +#endif // ART_RUNTIME_GC_ALLOCATOR_ART_DLMALLOC_H_ diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 0de62fef47..1f123aaff5 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -160,17 +160,31 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, if (young_gen_) { gc_time_histogram_ = metrics->YoungGcCollectionTime(); metrics_gc_count_ = metrics->YoungGcCount(); + metrics_gc_count_delta_ = metrics->YoungGcCountDelta(); gc_throughput_histogram_ = metrics->YoungGcThroughput(); gc_tracing_throughput_hist_ = metrics->YoungGcTracingThroughput(); gc_throughput_avg_ = metrics->YoungGcThroughputAvg(); gc_tracing_throughput_avg_ = metrics->YoungGcTracingThroughputAvg(); + gc_scanned_bytes_ = metrics->YoungGcScannedBytes(); + gc_scanned_bytes_delta_ = metrics->YoungGcScannedBytesDelta(); + gc_freed_bytes_ = metrics->YoungGcFreedBytes(); + gc_freed_bytes_delta_ = metrics->YoungGcFreedBytesDelta(); + gc_duration_ = metrics->YoungGcDuration(); + gc_duration_delta_ = metrics->YoungGcDurationDelta(); } else { gc_time_histogram_ = metrics->FullGcCollectionTime(); metrics_gc_count_ = metrics->FullGcCount(); + metrics_gc_count_delta_ = metrics->FullGcCountDelta(); gc_throughput_histogram_ = metrics->FullGcThroughput(); gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput(); gc_throughput_avg_ = metrics->FullGcThroughputAvg(); gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg(); + gc_scanned_bytes_ = metrics->FullGcScannedBytes(); + gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta(); + gc_freed_bytes_ = metrics->FullGcFreedBytes(); + gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta(); + gc_duration_ = metrics->FullGcDuration(); + gc_duration_delta_ = metrics->FullGcDurationDelta(); } } @@ -575,10 +589,11 @@ class ConcurrentCopying::FlipCallback : public Closure { if (kIsDebugBuild && !cc->use_generational_cc_) { cc->region_space_->AssertAllRegionLiveBytesZeroOrCleared(); } - if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) { - CHECK(Runtime::Current()->IsAotCompiler()); + Runtime* runtime = Runtime::Current(); + if (UNLIKELY(runtime->IsActiveTransaction())) { + CHECK(runtime->IsAotCompiler()); TimingLogger::ScopedTiming split3("(Paused)VisitTransactionRoots", cc->GetTimings()); - Runtime::Current()->VisitTransactionRoots(cc); + runtime->VisitTransactionRoots(cc); } if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { cc->GrayAllNewlyDirtyImmuneObjects(); @@ -587,15 +602,10 @@ class ConcurrentCopying::FlipCallback : public Closure { cc->VerifyGrayImmuneObjects(); } } - // May be null during runtime creation, in this case leave java_lang_Object null. - // This is safe since single threaded behavior should mean FillWithFakeObject does not - // happen when java_lang_Object_ is null. - if (WellKnownClasses::java_lang_Object != nullptr) { - cc->java_lang_Object_ = down_cast<mirror::Class*>(cc->Mark(thread, - WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object).Ptr())); - } else { - cc->java_lang_Object_ = nullptr; - } + ObjPtr<mirror::Class> java_lang_Object = + GetClassRoot<mirror::Object, kWithoutReadBarrier>(runtime->GetClassLinker()); + DCHECK(java_lang_Object != nullptr); + cc->java_lang_Object_ = down_cast<mirror::Class*>(cc->Mark(thread, java_lang_Object.Ptr())); } private: @@ -1692,8 +1702,6 @@ void ConcurrentCopying::CopyingPhase() { if (kVerboseMode) { LOG(INFO) << "SweepSystemWeaks done"; } - // Free data for class loaders that we unloaded. - Runtime::Current()->GetClassLinker()->CleanupClassLoaders(); // Marking is done. Disable marking. DisableMarking(); CheckEmptyMarkStack(); @@ -1739,6 +1747,10 @@ class ConcurrentCopying::DisableMarkingCheckpoint : public Closure { thread->IsSuspended() || thread->GetState() == ThreadState::kWaitingPerformingGc) << thread->GetState() << " thread " << thread << " self " << self; + // We sweep interpreter caches here so that it can be done after all + // reachable objects are marked and the mutators can sweep their caches + // without synchronization. + thread->SweepInterpreterCache(concurrent_copying_); // Disable the thread-local is_gc_marking flag. // Note a thread that has just started right before this checkpoint may have already this flag // set to false, which is ok. @@ -1887,7 +1899,10 @@ void ConcurrentCopying::PushOntoMarkStack(Thread* const self, mirror::Object* to << " cc->is_marking=" << is_marking_; CHECK(self == thread_running_gc_) << "Only GC-running thread should access the mark stack " - << "in the GC exclusive mark stack mode"; + << "in the GC exclusive mark stack mode. " + << "ref=" << to_ref + << " self->gc_marking=" << self->GetIsGcMarking() + << " cc->is_marking=" << is_marking_; // Access the GC mark stack without a lock. if (UNLIKELY(gc_mark_stack_->IsFull())) { ExpandGcMarkStack(); @@ -2716,6 +2731,11 @@ void ConcurrentCopying::ReclaimPhase() { } Thread* self = Thread::Current(); + // Free data for class loaders that we unloaded. This includes removing + // dead methods from JIT's internal maps. This must be done before + // reclaiming the memory of the dead methods' declaring classes. + Runtime::Current()->GetClassLinker()->CleanupClassLoaders(); + { // Double-check that the mark stack is empty. // Note: need to set this after VerifyNoFromSpaceRef(). diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index c274fed23b..888c38aa95 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -161,6 +161,10 @@ class ConcurrentCopying : public GarbageCollector { REQUIRES_SHARED(Locks::mutator_lock_); void AssertNoThreadMarkStackMapping(Thread* thread) REQUIRES(!mark_stack_lock_); + // Dump information about reference `ref` and return it as a string. + // Use `ref_name` to name the reference in messages. Each message is prefixed with `indent`. + std::string DumpReferenceInfo(mirror::Object* ref, const char* ref_name, const char* indent = "") + REQUIRES_SHARED(Locks::mutator_lock_); private: void PushOntoMarkStack(Thread* const self, mirror::Object* obj) @@ -282,10 +286,6 @@ class ConcurrentCopying : public GarbageCollector { void ComputeUnevacFromSpaceLiveRatio(); void LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset offset) REQUIRES_SHARED(Locks::mutator_lock_); - // Dump information about reference `ref` and return it as a string. - // Use `ref_name` to name the reference in messages. Each message is prefixed with `indent`. - std::string DumpReferenceInfo(mirror::Object* ref, const char* ref_name, const char* indent = "") - REQUIRES_SHARED(Locks::mutator_lock_); // Dump information about heap reference `ref`, referenced from object `obj` at offset `offset`, // and return it as a string. std::string DumpHeapReference(mirror::Object* obj, MemberOffset offset, mirror::Object* ref) diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc index 80b39824ec..03a432dbf4 100644 --- a/runtime/gc/collector/garbage_collector.cc +++ b/runtime/gc/collector/garbage_collector.cc @@ -72,10 +72,17 @@ GarbageCollector::GarbageCollector(Heap* heap, const std::string& name) freed_bytes_histogram_((name_ + " freed-bytes").c_str(), kMemBucketSize, kMemBucketCount), gc_time_histogram_(nullptr), metrics_gc_count_(nullptr), + metrics_gc_count_delta_(nullptr), gc_throughput_histogram_(nullptr), gc_tracing_throughput_hist_(nullptr), gc_throughput_avg_(nullptr), gc_tracing_throughput_avg_(nullptr), + gc_scanned_bytes_(nullptr), + gc_scanned_bytes_delta_(nullptr), + gc_freed_bytes_(nullptr), + gc_freed_bytes_delta_(nullptr), + gc_duration_(nullptr), + gc_duration_delta_(nullptr), cumulative_timings_(name), pause_histogram_lock_("pause histogram lock", kDefaultMutexLevel, true), is_transaction_active_(false), @@ -189,19 +196,26 @@ void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) { RegisterPause(duration_ns); } total_time_ns_ += duration_ns; - uint64_t total_pause_time = 0; + uint64_t total_pause_time_ns = 0; for (uint64_t pause_time : current_iteration->GetPauseTimes()) { MutexLock mu(self, pause_histogram_lock_); pause_histogram_.AdjustAndAddValue(pause_time); - total_pause_time += pause_time; + total_pause_time_ns += pause_time; } metrics::ArtMetrics* metrics = runtime->GetMetrics(); // Report STW pause time in microseconds. - metrics->WorldStopTimeDuringGCAvg()->Add(total_pause_time / 1'000); + const uint64_t total_pause_time_us = total_pause_time_ns / 1'000; + metrics->WorldStopTimeDuringGCAvg()->Add(total_pause_time_us); + metrics->GcWorldStopTime()->Add(total_pause_time_us); + metrics->GcWorldStopTimeDelta()->Add(total_pause_time_us); + metrics->GcWorldStopCount()->AddOne(); + metrics->GcWorldStopCountDelta()->AddOne(); // Report total collection time of all GCs put together. metrics->TotalGcCollectionTime()->Add(NsToMs(duration_ns)); + metrics->TotalGcCollectionTimeDelta()->Add(NsToMs(duration_ns)); if (are_metrics_initialized_) { metrics_gc_count_->Add(1); + metrics_gc_count_delta_->Add(1); // Report GC time in milliseconds. gc_time_histogram_->Add(NsToMs(duration_ns)); // Throughput in bytes/s. Add 1us to prevent possible division by 0. @@ -216,6 +230,13 @@ void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) { throughput = current_iteration->GetEstimatedThroughput() / MB; gc_throughput_histogram_->Add(throughput); gc_throughput_avg_->Add(throughput); + + gc_scanned_bytes_->Add(current_iteration->GetScannedBytes()); + gc_scanned_bytes_delta_->Add(current_iteration->GetScannedBytes()); + gc_freed_bytes_->Add(current_iteration->GetFreedBytes()); + gc_freed_bytes_delta_->Add(current_iteration->GetFreedBytes()); + gc_duration_->Add(NsToMs(current_iteration->GetDurationNs())); + gc_duration_delta_->Add(NsToMs(current_iteration->GetDurationNs())); } is_transaction_active_ = false; } diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index d439914621..948a868bd2 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -162,10 +162,17 @@ class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public Mark Histogram<size_t> freed_bytes_histogram_; metrics::MetricsBase<int64_t>* gc_time_histogram_; metrics::MetricsBase<uint64_t>* metrics_gc_count_; + metrics::MetricsBase<uint64_t>* metrics_gc_count_delta_; metrics::MetricsBase<int64_t>* gc_throughput_histogram_; metrics::MetricsBase<int64_t>* gc_tracing_throughput_hist_; metrics::MetricsBase<uint64_t>* gc_throughput_avg_; metrics::MetricsBase<uint64_t>* gc_tracing_throughput_avg_; + metrics::MetricsBase<uint64_t>* gc_scanned_bytes_; + metrics::MetricsBase<uint64_t>* gc_scanned_bytes_delta_; + metrics::MetricsBase<uint64_t>* gc_freed_bytes_; + metrics::MetricsBase<uint64_t>* gc_freed_bytes_delta_; + metrics::MetricsBase<uint64_t>* gc_duration_; + metrics::MetricsBase<uint64_t>* gc_duration_delta_; uint64_t total_thread_cpu_time_ns_; uint64_t total_time_ns_; uint64_t total_freed_objects_; diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc index a0ea60d4c5..caa8106228 100644 --- a/runtime/gc/collector/immune_spaces_test.cc +++ b/runtime/gc/collector/immune_spaces_test.cc @@ -16,7 +16,8 @@ #include <sys/mman.h> -#include "common_runtime_test.h" +#include "base/common_art_test.h" +#include "base/utils.h" #include "gc/collector/immune_spaces.h" #include "gc/space/image_space.h" #include "gc/space/space-inl.h" @@ -46,7 +47,7 @@ class FakeImageSpace : public space::ImageSpace { MemMap&& oat_map) : ImageSpace("FakeImageSpace", /*image_location=*/"", - /*profile_file=*/{}, + /*profile_files=*/{}, std::move(map), std::move(live_bitmap), map.End()), @@ -59,7 +60,7 @@ class FakeImageSpace : public space::ImageSpace { MemMap oat_map_; }; -class ImmuneSpacesTest : public CommonRuntimeTest { +class ImmuneSpacesTest : public CommonArtTest { static constexpr size_t kMaxBitmaps = 10; public: diff --git a/runtime/gc/collector/mark_compact-inl.h b/runtime/gc/collector/mark_compact-inl.h new file mode 100644 index 0000000000..c9b792e8f6 --- /dev/null +++ b/runtime/gc/collector/mark_compact-inl.h @@ -0,0 +1,394 @@ +/* + * Copyright 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_INL_H_ +#define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_INL_H_ + +#include "gc/space/bump_pointer_space.h" +#include "mark_compact.h" +#include "mirror/object-inl.h" + +namespace art { +namespace gc { +namespace collector { + +inline void MarkCompact::UpdateClassAfterObjectMap(mirror::Object* obj) { + mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); + // Track a class if it needs walking super-classes for visiting references or + // if it's higher in address order than its objects and is in moving space. + if (UNLIKELY( + (std::less<mirror::Object*>{}(obj, klass) && bump_pointer_space_->HasAddress(klass)) || + (klass->GetReferenceInstanceOffsets<kVerifyNone>() == mirror::Class::kClassWalkSuper && + walk_super_class_cache_ != klass))) { + // Since this function gets invoked in the compaction pause as well, it is + // preferable to store such super class separately rather than updating key + // as the latter would require traversing the hierarchy for every object of 'klass'. + auto ret1 = class_after_obj_hash_map_.try_emplace(ObjReference::FromMirrorPtr(klass), + ObjReference::FromMirrorPtr(obj)); + if (ret1.second) { + if (klass->GetReferenceInstanceOffsets<kVerifyNone>() == mirror::Class::kClassWalkSuper) { + // In this case we require traversing through the super class hierarchy + // and find the super class at the highest address order. + mirror::Class* highest_klass = bump_pointer_space_->HasAddress(klass) ? klass : nullptr; + for (ObjPtr<mirror::Class> k = klass->GetSuperClass<kVerifyNone, kWithoutReadBarrier>(); + k != nullptr; + k = k->GetSuperClass<kVerifyNone, kWithoutReadBarrier>()) { + // TODO: Can we break once we encounter a super class outside the moving space? + if (bump_pointer_space_->HasAddress(k.Ptr())) { + highest_klass = std::max(highest_klass, k.Ptr(), std::less<mirror::Class*>()); + } + } + if (highest_klass != nullptr && highest_klass != klass) { + auto ret2 = super_class_after_class_hash_map_.try_emplace( + ObjReference::FromMirrorPtr(klass), ObjReference::FromMirrorPtr(highest_klass)); + DCHECK(ret2.second); + } else { + walk_super_class_cache_ = klass; + } + } + } else if (std::less<mirror::Object*>{}(obj, ret1.first->second.AsMirrorPtr())) { + ret1.first->second = ObjReference::FromMirrorPtr(obj); + } + } +} + +template <size_t kAlignment> +inline uintptr_t MarkCompact::LiveWordsBitmap<kAlignment>::SetLiveWords(uintptr_t begin, + size_t size) { + const uintptr_t begin_bit_idx = MemRangeBitmap::BitIndexFromAddr(begin); + DCHECK(!Bitmap::TestBit(begin_bit_idx)); + // Range to set bit: [begin, end] + uintptr_t end = begin + size - kAlignment; + const uintptr_t end_bit_idx = MemRangeBitmap::BitIndexFromAddr(end); + uintptr_t* begin_bm_address = Bitmap::Begin() + Bitmap::BitIndexToWordIndex(begin_bit_idx); + uintptr_t* end_bm_address = Bitmap::Begin() + Bitmap::BitIndexToWordIndex(end_bit_idx); + ptrdiff_t diff = end_bm_address - begin_bm_address; + uintptr_t mask = Bitmap::BitIndexToMask(begin_bit_idx); + // Bits that needs to be set in the first word, if it's not also the last word + mask = ~(mask - 1); + if (diff > 0) { + *begin_bm_address |= mask; + mask = ~0; + // Even though memset can handle the (diff == 1) case but we should avoid the + // overhead of a function call for this, highly likely (as most of the objects + // are small), case. + if (diff > 1) { + // Set all intermediate bits to 1. + std::memset(static_cast<void*>(begin_bm_address + 1), 0xff, (diff - 1) * sizeof(uintptr_t)); + } + } + uintptr_t end_mask = Bitmap::BitIndexToMask(end_bit_idx); + *end_bm_address |= mask & (end_mask | (end_mask - 1)); + return begin_bit_idx; +} + +template <size_t kAlignment> template <typename Visitor> +inline void MarkCompact::LiveWordsBitmap<kAlignment>::VisitLiveStrides(uintptr_t begin_bit_idx, + uint8_t* end, + const size_t bytes, + Visitor&& visitor) const { + // Range to visit [begin_bit_idx, end_bit_idx] + DCHECK(IsAligned<kAlignment>(end)); + end -= kAlignment; + const uintptr_t end_bit_idx = MemRangeBitmap::BitIndexFromAddr(reinterpret_cast<uintptr_t>(end)); + DCHECK_LE(begin_bit_idx, end_bit_idx); + uintptr_t begin_word_idx = Bitmap::BitIndexToWordIndex(begin_bit_idx); + const uintptr_t end_word_idx = Bitmap::BitIndexToWordIndex(end_bit_idx); + DCHECK(Bitmap::TestBit(begin_bit_idx)); + size_t stride_size = 0; + size_t idx_in_word = 0; + size_t num_heap_words = bytes / kAlignment; + uintptr_t live_stride_start_idx; + uintptr_t word = Bitmap::Begin()[begin_word_idx]; + + // Setup the first word. + word &= ~(Bitmap::BitIndexToMask(begin_bit_idx) - 1); + begin_bit_idx = RoundDown(begin_bit_idx, Bitmap::kBitsPerBitmapWord); + + do { + if (UNLIKELY(begin_word_idx == end_word_idx)) { + uintptr_t mask = Bitmap::BitIndexToMask(end_bit_idx); + word &= mask | (mask - 1); + } + if (~word == 0) { + // All bits in the word are marked. + if (stride_size == 0) { + live_stride_start_idx = begin_bit_idx; + } + stride_size += Bitmap::kBitsPerBitmapWord; + if (num_heap_words <= stride_size) { + break; + } + } else { + while (word != 0) { + // discard 0s + size_t shift = CTZ(word); + idx_in_word += shift; + word >>= shift; + if (stride_size > 0) { + if (shift > 0) { + if (num_heap_words <= stride_size) { + break; + } + visitor(live_stride_start_idx, stride_size, /*is_last*/ false); + num_heap_words -= stride_size; + live_stride_start_idx = begin_bit_idx + idx_in_word; + stride_size = 0; + } + } else { + live_stride_start_idx = begin_bit_idx + idx_in_word; + } + // consume 1s + shift = CTZ(~word); + DCHECK_NE(shift, 0u); + word >>= shift; + idx_in_word += shift; + stride_size += shift; + } + // If the whole word == 0 or the higher bits are 0s, then we exit out of + // the above loop without completely consuming the word, so call visitor, + // if needed. + if (idx_in_word < Bitmap::kBitsPerBitmapWord && stride_size > 0) { + if (num_heap_words <= stride_size) { + break; + } + visitor(live_stride_start_idx, stride_size, /*is_last*/ false); + num_heap_words -= stride_size; + stride_size = 0; + } + idx_in_word = 0; + } + begin_bit_idx += Bitmap::kBitsPerBitmapWord; + begin_word_idx++; + if (UNLIKELY(begin_word_idx > end_word_idx)) { + num_heap_words = std::min(stride_size, num_heap_words); + break; + } + word = Bitmap::Begin()[begin_word_idx]; + } while (true); + + if (stride_size > 0) { + visitor(live_stride_start_idx, num_heap_words, /*is_last*/ true); + } +} + +template <size_t kAlignment> +inline +uint32_t MarkCompact::LiveWordsBitmap<kAlignment>::FindNthLiveWordOffset(size_t chunk_idx, + uint32_t n) const { + DCHECK_LT(n, kBitsPerVectorWord); + const size_t index = chunk_idx * kBitmapWordsPerVectorWord; + for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) { + uintptr_t word = Bitmap::Begin()[index + i]; + if (~word == 0) { + if (n < Bitmap::kBitsPerBitmapWord) { + return i * Bitmap::kBitsPerBitmapWord + n; + } + n -= Bitmap::kBitsPerBitmapWord; + } else { + uint32_t j = 0; + while (word != 0) { + // count contiguous 0s + uint32_t shift = CTZ(word); + word >>= shift; + j += shift; + // count contiguous 1s + shift = CTZ(~word); + DCHECK_NE(shift, 0u); + if (shift > n) { + return i * Bitmap::kBitsPerBitmapWord + j + n; + } + n -= shift; + word >>= shift; + j += shift; + } + } + } + UNREACHABLE(); +} + +inline void MarkCompact::UpdateRef(mirror::Object* obj, MemberOffset offset) { + mirror::Object* old_ref = obj->GetFieldObject< + mirror::Object, kVerifyNone, kWithoutReadBarrier, /*kIsVolatile*/false>(offset); + if (kIsDebugBuild) { + if (live_words_bitmap_->HasAddress(old_ref) + && reinterpret_cast<uint8_t*>(old_ref) < black_allocations_begin_ + && !moving_space_bitmap_->Test(old_ref)) { + mirror::Object* from_ref = GetFromSpaceAddr(old_ref); + std::ostringstream oss; + heap_->DumpSpaces(oss); + MemMap::DumpMaps(oss, /* terse= */ true); + LOG(FATAL) << "Not marked in the bitmap ref=" << old_ref + << " from_ref=" << from_ref + << " offset=" << offset + << " obj=" << obj + << " obj-validity=" << IsValidObject(obj) + << " from-space=" << static_cast<void*>(from_space_begin_) + << " bitmap= " << moving_space_bitmap_->DumpMemAround(old_ref) + << " from_ref " + << heap_->GetVerification()->DumpRAMAroundAddress( + reinterpret_cast<uintptr_t>(from_ref), 128) + << " obj " + << heap_->GetVerification()->DumpRAMAroundAddress( + reinterpret_cast<uintptr_t>(obj), 128) + << " old_ref " << heap_->GetVerification()->DumpRAMAroundAddress( + reinterpret_cast<uintptr_t>(old_ref), 128) + << " maps\n" << oss.str(); + } + } + mirror::Object* new_ref = PostCompactAddress(old_ref); + if (new_ref != old_ref) { + obj->SetFieldObjectWithoutWriteBarrier< + /*kTransactionActive*/false, /*kCheckTransaction*/false, kVerifyNone, /*kIsVolatile*/false>( + offset, + new_ref); + } +} + +inline bool MarkCompact::VerifyRootSingleUpdate(void* root, + mirror::Object* old_ref, + const RootInfo& info) { + // ASAN promotes stack-frames to heap in order to detect + // stack-use-after-return issues. So skip using this double-root update + // detection on ASAN as well. + if (kIsDebugBuild && !kMemoryToolIsAvailable) { + void* stack_low_addr = stack_low_addr_; + void* stack_high_addr = stack_high_addr_; + if (!live_words_bitmap_->HasAddress(old_ref)) { + return false; + } + Thread* self = Thread::Current(); + if (UNLIKELY(stack_low_addr == nullptr)) { + stack_low_addr = self->GetStackEnd(); + stack_high_addr = reinterpret_cast<char*>(stack_low_addr) + self->GetStackSize(); + } + if (root < stack_low_addr || root > stack_high_addr) { + MutexLock mu(self, lock_); + auto ret = updated_roots_->insert(root); + DCHECK(ret.second) << "root=" << root << " old_ref=" << old_ref + << " stack_low_addr=" << stack_low_addr + << " stack_high_addr=" << stack_high_addr; + } + DCHECK(reinterpret_cast<uint8_t*>(old_ref) >= black_allocations_begin_ || + live_words_bitmap_->Test(old_ref)) + << "ref=" << old_ref << " <" << mirror::Object::PrettyTypeOf(old_ref) << "> RootInfo [" + << info << "]"; + } + return true; +} + +inline void MarkCompact::UpdateRoot(mirror::CompressedReference<mirror::Object>* root, + const RootInfo& info) { + DCHECK(!root->IsNull()); + mirror::Object* old_ref = root->AsMirrorPtr(); + if (VerifyRootSingleUpdate(root, old_ref, info)) { + mirror::Object* new_ref = PostCompactAddress(old_ref); + if (old_ref != new_ref) { + root->Assign(new_ref); + } + } +} + +inline void MarkCompact::UpdateRoot(mirror::Object** root, const RootInfo& info) { + mirror::Object* old_ref = *root; + if (VerifyRootSingleUpdate(root, old_ref, info)) { + mirror::Object* new_ref = PostCompactAddress(old_ref); + if (old_ref != new_ref) { + *root = new_ref; + } + } +} + +template <size_t kAlignment> +inline size_t MarkCompact::LiveWordsBitmap<kAlignment>::CountLiveWordsUpto(size_t bit_idx) const { + const size_t word_offset = Bitmap::BitIndexToWordIndex(bit_idx); + uintptr_t word; + size_t ret = 0; + // This is needed only if we decide to make chunks 128-bit but still + // choose to use 64-bit word for bitmap. Ideally we should use 128-bit + // SIMD instructions to compute popcount. + if (kBitmapWordsPerVectorWord > 1) { + for (size_t i = RoundDown(word_offset, kBitmapWordsPerVectorWord); i < word_offset; i++) { + word = Bitmap::Begin()[i]; + ret += POPCOUNT(word); + } + } + word = Bitmap::Begin()[word_offset]; + const uintptr_t mask = Bitmap::BitIndexToMask(bit_idx); + DCHECK_NE(word & mask, 0u) + << " word_offset:" << word_offset + << " bit_idx:" << bit_idx + << " bit_idx_in_word:" << (bit_idx % Bitmap::kBitsPerBitmapWord) + << std::hex << " word: 0x" << word + << " mask: 0x" << mask << std::dec; + ret += POPCOUNT(word & (mask - 1)); + return ret; +} + +inline mirror::Object* MarkCompact::PostCompactBlackObjAddr(mirror::Object* old_ref) const { + return reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(old_ref) + - black_objs_slide_diff_); +} + +inline mirror::Object* MarkCompact::PostCompactOldObjAddr(mirror::Object* old_ref) const { + const uintptr_t begin = live_words_bitmap_->Begin(); + const uintptr_t addr_offset = reinterpret_cast<uintptr_t>(old_ref) - begin; + const size_t vec_idx = addr_offset / kOffsetChunkSize; + const size_t live_bytes_in_bitmap_word = + live_words_bitmap_->CountLiveWordsUpto(addr_offset / kAlignment) * kAlignment; + return reinterpret_cast<mirror::Object*>(begin + + chunk_info_vec_[vec_idx] + + live_bytes_in_bitmap_word); +} + +inline mirror::Object* MarkCompact::PostCompactAddressUnchecked(mirror::Object* old_ref) const { + if (reinterpret_cast<uint8_t*>(old_ref) >= black_allocations_begin_) { + return PostCompactBlackObjAddr(old_ref); + } + if (kIsDebugBuild) { + mirror::Object* from_ref = GetFromSpaceAddr(old_ref); + DCHECK(live_words_bitmap_->Test(old_ref)) + << "ref=" << old_ref; + if (!moving_space_bitmap_->Test(old_ref)) { + std::ostringstream oss; + Runtime::Current()->GetHeap()->DumpSpaces(oss); + MemMap::DumpMaps(oss, /* terse= */ true); + LOG(FATAL) << "ref=" << old_ref + << " from_ref=" << from_ref + << " from-space=" << static_cast<void*>(from_space_begin_) + << " bitmap= " << moving_space_bitmap_->DumpMemAround(old_ref) + << heap_->GetVerification()->DumpRAMAroundAddress( + reinterpret_cast<uintptr_t>(from_ref), 128) + << " maps\n" << oss.str(); + } + } + return PostCompactOldObjAddr(old_ref); +} + +inline mirror::Object* MarkCompact::PostCompactAddress(mirror::Object* old_ref) const { + // TODO: To further speedup the check, maybe we should consider caching heap + // start/end in this object. + if (LIKELY(live_words_bitmap_->HasAddress(old_ref))) { + return PostCompactAddressUnchecked(old_ref); + } + return old_ref; +} + +} // namespace collector +} // namespace gc +} // namespace art + +#endif // ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_INL_H_ diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc new file mode 100644 index 0000000000..bb34068fb1 --- /dev/null +++ b/runtime/gc/collector/mark_compact.cc @@ -0,0 +1,4300 @@ +/* + * Copyright 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fcntl.h> +// Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without. +#if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE) +#include <linux/falloc.h> +#endif +#include <linux/userfaultfd.h> +#include <poll.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <fstream> +#include <numeric> +#include <string> +#include <string_view> +#include <vector> + +#include "android-base/file.h" +#include "android-base/parsebool.h" +#include "android-base/parseint.h" +#include "android-base/properties.h" +#include "android-base/strings.h" +#include "base/file_utils.h" +#include "base/memfd.h" +#include "base/quasi_atomic.h" +#include "base/systrace.h" +#include "base/utils.h" +#include "gc/accounting/mod_union_table-inl.h" +#include "gc/collector_type.h" +#include "gc/reference_processor.h" +#include "gc/space/bump_pointer_space.h" +#include "gc/task_processor.h" +#include "gc/verification-inl.h" +#include "jit/jit_code_cache.h" +#include "mark_compact-inl.h" +#include "mirror/object-refvisitor-inl.h" +#include "read_barrier_config.h" +#include "scoped_thread_state_change-inl.h" +#include "sigchain.h" +#include "thread_list.h" + +#ifdef ART_TARGET_ANDROID +#include "com_android_art.h" +#endif + +#ifndef __BIONIC__ +#ifndef MREMAP_DONTUNMAP +#define MREMAP_DONTUNMAP 4 +#endif +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0x100000 +#endif +#ifndef __NR_userfaultfd +#if defined(__x86_64__) +#define __NR_userfaultfd 323 +#elif defined(__i386__) +#define __NR_userfaultfd 374 +#elif defined(__aarch64__) +#define __NR_userfaultfd 282 +#elif defined(__arm__) +#define __NR_userfaultfd 388 +#else +#error "__NR_userfaultfd undefined" +#endif +#endif // __NR_userfaultfd +#endif // __BIONIC__ + +namespace { + +using ::android::base::GetBoolProperty; +using ::android::base::ParseBool; +using ::android::base::ParseBoolResult; + +} // namespace + +namespace art { + +static bool HaveMremapDontunmap() { + void* old = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + CHECK_NE(old, MAP_FAILED); + void* addr = mremap(old, kPageSize, kPageSize, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr); + CHECK_EQ(munmap(old, kPageSize), 0); + if (addr != MAP_FAILED) { + CHECK_EQ(munmap(addr, kPageSize), 0); + return true; + } else { + return false; + } +} +// We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was +// introduced in 5.13 kernel version. But it was backported to GKI kernels. +static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap(); +// Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl. +static uint64_t gUffdFeatures = 0; +// Both, missing and minor faults on shmem are needed only for minor-fault mode. +static constexpr uint64_t kUffdFeaturesForMinorFault = + UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM; +static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS; +// We consider SIGBUS feature necessary to enable this GC as it's superior than +// threading-based implementation for janks. However, since we have the latter +// already implemented, for testing purposes, we allow choosing either of the +// two at boot time in the constructor below. +// Note that having minor-fault feature implies having SIGBUS feature as the +// latter was introduced earlier than the former. In other words, having +// minor-fault feature implies having SIGBUS. We still want minor-fault to be +// available for making jit-code-cache updation concurrent, which uses shmem. +static constexpr uint64_t kUffdFeaturesRequired = + kUffdFeaturesForMinorFault | kUffdFeaturesForSigbus; + +bool KernelSupportsUffd() { +#ifdef __linux__ + if (gHaveMremapDontunmap) { + int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY); + // On non-android devices we may not have the kernel patches that restrict + // userfaultfd to user mode. But that is not a security concern as we are + // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY. + if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) { + fd = syscall(__NR_userfaultfd, O_CLOEXEC); + } + if (fd >= 0) { + // We are only fetching the available features, which is returned by the + // ioctl. + struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0}; + CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno); + gUffdFeatures = api.features; + close(fd); + // Allow this GC to be used only if minor-fault and sigbus feature is available. + return (api.features & kUffdFeaturesRequired) == kUffdFeaturesRequired; + } + } +#endif + return false; +} + +// The other cases are defined as constexpr in runtime/read_barrier_config.h +#if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER) +// Returns collector type asked to be used on the cmdline. +static gc::CollectorType FetchCmdlineGcType() { + std::string argv; + gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone; + if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) { + if (argv.find("-Xgc:CMC") != std::string::npos) { + gc_type = gc::CollectorType::kCollectorTypeCMC; + } else if (argv.find("-Xgc:CC") != std::string::npos) { + gc_type = gc::CollectorType::kCollectorTypeCC; + } + } + return gc_type; +} + +#ifdef ART_TARGET_ANDROID +static int GetOverrideCacheInfoFd() { + std::string args_str; + if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) { + LOG(WARNING) << "Failed to load /proc/self/cmdline"; + return -1; + } + std::vector<std::string_view> args; + Split(std::string_view(args_str), /*separator=*/'\0', &args); + for (std::string_view arg : args) { + if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) { // This is a dex2oat flag. + int fd; + if (!android::base::ParseInt(std::string(arg), &fd)) { + LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')"; + return -1; + } + return fd; + } + } + return -1; +} + +static bool GetCachedBoolProperty(const std::string& key, bool default_value) { + // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd. + static bool called = false; + CHECK(!called) << "GetCachedBoolProperty can be called only once"; + called = true; + + std::string cache_info_contents; + int fd = GetOverrideCacheInfoFd(); + if (fd >= 0) { + if (!android::base::ReadFdToString(fd, &cache_info_contents)) { + PLOG(ERROR) << "Failed to read cache-info from fd " << fd; + return default_value; + } + } else { + std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml"; + if (!android::base::ReadFileToString(path, &cache_info_contents)) { + // If the file is not found, then we are in chroot or in a standalone runtime process (e.g., + // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's + // nothing we can do. + if (errno != ENOENT) { + PLOG(ERROR) << "Failed to read cache-info from the default path"; + } + return default_value; + } + } + + std::optional<com::android::art::CacheInfo> cache_info = + com::android::art::parse(cache_info_contents.c_str()); + if (!cache_info.has_value()) { + // This should never happen. + LOG(ERROR) << "Failed to parse cache-info"; + return default_value; + } + const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties(); + if (list == nullptr) { + // This should never happen. + LOG(ERROR) << "Missing system properties from cache-info"; + return default_value; + } + const std::vector<com::android::art::KeyValuePair>& properties = list->getItem(); + for (const com::android::art::KeyValuePair& pair : properties) { + if (pair.getK() == key) { + ParseBoolResult result = ParseBool(pair.getV()); + switch (result) { + case ParseBoolResult::kTrue: + return true; + case ParseBoolResult::kFalse: + return false; + case ParseBoolResult::kError: + return default_value; + } + } + } + return default_value; +} + +static bool SysPropSaysUffdGc() { + // The phenotype flag can change at time time after boot, but it shouldn't take effect until a + // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot. + return GetCachedBoolProperty("persist.device_config.runtime_native_boot.enable_uffd_gc", + GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false)); +} +#else +// Never called. +static bool SysPropSaysUffdGc() { return false; } +#endif + +static bool ShouldUseUserfaultfd() { + static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier); +#ifdef __linux__ + // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise, + // always use CC on host. On target, use CMC only if system properties says so + // and the kernel supports it. + gc::CollectorType gc_type = FetchCmdlineGcType(); + return gc_type == gc::CollectorType::kCollectorTypeCMC || + (gc_type == gc::CollectorType::kCollectorTypeNone && + kIsTargetAndroid && + SysPropSaysUffdGc() && + KernelSupportsUffd()); +#else + return false; +#endif +} + +const bool gUseUserfaultfd = ShouldUseUserfaultfd(); +const bool gUseReadBarrier = !gUseUserfaultfd; +#endif + +namespace gc { +namespace collector { + +// Turn off kCheckLocks when profiling the GC as it slows down the GC +// significantly. +static constexpr bool kCheckLocks = kDebugLocking; +static constexpr bool kVerifyRootsMarked = kIsDebugBuild; +// Two threads should suffice on devices. +static constexpr size_t kMaxNumUffdWorkers = 2; +// Number of compaction buffers reserved for mutator threads in SIGBUS feature +// case. It's extremely unlikely that we will ever have more than these number +// of mutator threads trying to access the moving-space during one compaction +// phase. Using a lower number in debug builds to hopefully catch the issue +// before it becomes a problem on user builds. +static constexpr size_t kMutatorCompactionBufferCount = kIsDebugBuild ? 256 : 512; +// Minimum from-space chunk to be madvised (during concurrent compaction) in one go. +static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB; +// Concurrent compaction termination logic is different (and slightly more efficient) if the +// kernel has the fault-retry feature (allowing repeated faults on the same page), which was +// introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088). +// This allows a single page fault to be handled, in turn, by each worker thread, only waking +// up the GC thread at the end. +static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7); + +std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() { + bool uffd_available; + // In most cases the gUffdFeatures will already be initialized at boot time + // when libart is loaded. On very old kernels we may get '0' from the kernel, + // in which case we would be doing the syscalls each time this function is + // called. But that's very unlikely case. There are no correctness issues as + // the response from kernel never changes after boot. + if (UNLIKELY(gUffdFeatures == 0)) { + uffd_available = KernelSupportsUffd(); + } else { + // We can have any uffd features only if uffd exists. + uffd_available = true; + } + bool minor_fault_available = + (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault; + return std::pair<bool, bool>(uffd_available, minor_fault_available); +} + +bool MarkCompact::CreateUserfaultfd(bool post_fork) { + if (post_fork || uffd_ == kFdUnused) { + // Check if we have MREMAP_DONTUNMAP here for cases where + // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures + // that userfaultfd isn't used on old kernels, which cause random ioctl + // failures. + if (gHaveMremapDontunmap) { + // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't + // any read event available. We don't use poll. + uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY); + // On non-android devices we may not have the kernel patches that restrict + // userfaultfd to user mode. But that is not a security concern as we are + // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY. + if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) { + uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC); + } + if (UNLIKELY(uffd_ == -1)) { + uffd_ = kFallbackMode; + LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno) + << ") and therefore falling back to stop-the-world compaction."; + } else { + DCHECK(IsValidFd(uffd_)); + // Initialize uffd with the features which are required and available. + struct uffdio_api api = {.api = UFFD_API, .features = gUffdFeatures, .ioctls = 0}; + api.features &= use_uffd_sigbus_ ? kUffdFeaturesRequired : kUffdFeaturesForMinorFault; + CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0) + << "ioctl_userfaultfd: API: " << strerror(errno); + } + } else { + uffd_ = kFallbackMode; + } + } + uffd_initialized_ = !post_fork || uffd_ == kFallbackMode; + return IsValidFd(uffd_); +} + +template <size_t kAlignment> +MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create( + uintptr_t begin, uintptr_t end) { + return static_cast<LiveWordsBitmap<kAlignment>*>( + MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end)); +} + +static bool IsSigbusFeatureAvailable() { + MarkCompact::GetUffdAndMinorFault(); + return gUffdFeatures & UFFD_FEATURE_SIGBUS; +} + +MarkCompact::MarkCompact(Heap* heap) + : GarbageCollector(heap, "concurrent mark compact"), + gc_barrier_(0), + lock_("mark compact lock", kGenericBottomLock), + bump_pointer_space_(heap->GetBumpPointerSpace()), + moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()), + moving_to_space_fd_(kFdUnused), + moving_from_space_fd_(kFdUnused), + uffd_(kFdUnused), + sigbus_in_progress_count_(kSigbusCounterCompactionDoneMask), + compaction_in_progress_count_(0), + thread_pool_counter_(0), + compacting_(false), + uffd_initialized_(false), + uffd_minor_fault_supported_(false), + use_uffd_sigbus_(IsSigbusFeatureAvailable()), + minor_fault_initialized_(false), + map_linear_alloc_shared_(false) { + if (kIsDebugBuild) { + updated_roots_.reset(new std::unordered_set<void*>()); + } + // TODO: When using minor-fault feature, the first GC after zygote-fork + // requires mapping the linear-alloc again with MAP_SHARED. This leaves a + // gap for suspended threads to access linear-alloc when it's empty (after + // mremap) and not yet userfaultfd registered. This cannot be fixed by merely + // doing uffd registration first. For now, just assert that we are not using + // minor-fault. Eventually, a cleanup of linear-alloc update logic to only + // use private anonymous would be ideal. + CHECK(!uffd_minor_fault_supported_); + + // TODO: Depending on how the bump-pointer space move is implemented. If we + // switch between two virtual memories each time, then we will have to + // initialize live_words_bitmap_ accordingly. + live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create( + reinterpret_cast<uintptr_t>(bump_pointer_space_->Begin()), + reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit()))); + + // Create one MemMap for all the data structures + size_t moving_space_size = bump_pointer_space_->Capacity(); + size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize; + size_t nr_moving_pages = moving_space_size / kPageSize; + size_t nr_non_moving_pages = heap->GetNonMovingSpace()->Capacity() / kPageSize; + + std::string err_msg; + info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector", + chunk_info_vec_size * sizeof(uint32_t) + + nr_non_moving_pages * sizeof(ObjReference) + + nr_moving_pages * sizeof(ObjReference) + + nr_moving_pages * sizeof(uint32_t), + PROT_READ | PROT_WRITE, + /*low_4gb=*/ false, + &err_msg); + if (UNLIKELY(!info_map_.IsValid())) { + LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg; + } else { + uint8_t* p = info_map_.Begin(); + chunk_info_vec_ = reinterpret_cast<uint32_t*>(p); + vector_length_ = chunk_info_vec_size; + + p += chunk_info_vec_size * sizeof(uint32_t); + first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p); + + p += nr_non_moving_pages * sizeof(ObjReference); + first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p); + + p += nr_moving_pages * sizeof(ObjReference); + pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p); + } + + size_t moving_space_alignment = BestPageTableAlignment(moving_space_size); + // The moving space is created at a fixed address, which is expected to be + // PMD-size aligned. + if (!IsAlignedParam(bump_pointer_space_->Begin(), moving_space_alignment)) { + LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment) + << ". This can lead to longer stop-the-world pauses for compaction"; + } + // NOTE: PROT_NONE is used here as these mappings are for address space reservation + // only and will be used only after appropriately remapping them. + from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space", + moving_space_size, + PROT_NONE, + /*low_4gb=*/kObjPtrPoisoning, + moving_space_alignment, + &err_msg); + if (UNLIKELY(!from_space_map_.IsValid())) { + LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg; + } else { + from_space_begin_ = from_space_map_.Begin(); + } + + // In some cases (32-bit or kObjPtrPoisoning) it's too much to ask for 3 + // heap-sized mappings in low-4GB. So tolerate failure here by attempting to + // mmap again right before the compaction pause. And if even that fails, then + // running the GC cycle in copy-mode rather than minor-fault. + // + // This map doesn't have to be aligned to 2MB as we don't mremap on it. + if (!kObjPtrPoisoning && uffd_minor_fault_supported_) { + // We need this map only if minor-fault feature is supported. But in that case + // don't create the mapping if obj-ptr poisoning is enabled as then the mapping + // has to be created in low_4gb. Doing this here rather than later causes the + // Dex2oatImageTest.TestExtension gtest to fail in 64-bit platforms. + shadow_to_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact moving-space shadow", + moving_space_size, + PROT_NONE, + /*low_4gb=*/false, + &err_msg); + if (!shadow_to_space_map_.IsValid()) { + LOG(WARNING) << "Failed to allocate concurrent mark-compact moving-space shadow: " << err_msg; + } + } + const size_t num_pages = + 1 + (use_uffd_sigbus_ ? kMutatorCompactionBufferCount : + std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); + compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers", + kPageSize * num_pages, + PROT_READ | PROT_WRITE, + /*low_4gb=*/kObjPtrPoisoning, + &err_msg); + if (UNLIKELY(!compaction_buffers_map_.IsValid())) { + LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg; + } + // We also use the first page-sized buffer for the purpose of terminating concurrent compaction. + conc_compaction_termination_page_ = compaction_buffers_map_.Begin(); + // Touch the page deliberately to avoid userfaults on it. We madvise it in + // CompactionPhase() before using it to terminate concurrent compaction. + ForceRead(conc_compaction_termination_page_); + + // In most of the cases, we don't expect more than one LinearAlloc space. + linear_alloc_spaces_data_.reserve(1); + + // Initialize GC metrics. + metrics::ArtMetrics* metrics = GetMetrics(); + // The mark-compact collector supports only full-heap collections at the moment. + gc_time_histogram_ = metrics->FullGcCollectionTime(); + metrics_gc_count_ = metrics->FullGcCount(); + metrics_gc_count_delta_ = metrics->FullGcCountDelta(); + gc_throughput_histogram_ = metrics->FullGcThroughput(); + gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput(); + gc_throughput_avg_ = metrics->FullGcThroughputAvg(); + gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg(); + gc_scanned_bytes_ = metrics->FullGcScannedBytes(); + gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta(); + gc_freed_bytes_ = metrics->FullGcFreedBytes(); + gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta(); + gc_duration_ = metrics->FullGcDuration(); + gc_duration_delta_ = metrics->FullGcDurationDelta(); + are_metrics_initialized_ = true; +} + +void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) { + DCHECK_ALIGNED(begin, kPageSize); + DCHECK_ALIGNED(len, kPageSize); + DCHECK_GE(len, kPMDSize); + size_t alignment = BestPageTableAlignment(len); + bool is_shared = false; + // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature. + if (map_linear_alloc_shared_) { + void* ret = mmap(begin, + len, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED, + /*fd=*/-1, + /*offset=*/0); + CHECK_EQ(ret, begin) << "mmap failed: " << strerror(errno); + is_shared = true; + } + std::string err_msg; + MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map", + len, + PROT_NONE, + /*low_4gb=*/false, + alignment, + &err_msg)); + if (!shadow.IsValid()) { + LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg; + UNREACHABLE(); + } + + MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map", + len / kPageSize, + PROT_READ | PROT_WRITE, + /*low_4gb=*/false, + &err_msg)); + if (!page_status_map.IsValid()) { + LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg; + UNREACHABLE(); + } + linear_alloc_spaces_data_.emplace_back(std::forward<MemMap>(shadow), + std::forward<MemMap>(page_status_map), + begin, + begin + len, + is_shared); +} + +void MarkCompact::BindAndResetBitmaps() { + // TODO: We need to hold heap_bitmap_lock_ only for populating immune_spaces. + // The card-table and mod-union-table processing can be done without it. So + // change the logic below. Note that the bitmap clearing would require the + // lock. + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + accounting::CardTable* const card_table = heap_->GetCardTable(); + // Mark all of the spaces we never collect as immune. + for (const auto& space : GetHeap()->GetContinuousSpaces()) { + if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect || + space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { + CHECK(space->IsZygoteSpace() || space->IsImageSpace()); + immune_spaces_.AddSpace(space); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); + if (table != nullptr) { + table->ProcessCards(); + } else { + // Keep cards aged if we don't have a mod-union table since we may need + // to scan them in future GCs. This case is for app images. + // TODO: We could probably scan the objects right here to avoid doing + // another scan through the card-table. + card_table->ModifyCardsAtomic( + space->Begin(), + space->End(), + [](uint8_t card) { + return (card == gc::accounting::CardTable::kCardClean) + ? card + : gc::accounting::CardTable::kCardAged; + }, + /* card modified visitor */ VoidFunctor()); + } + } else { + CHECK(!space->IsZygoteSpace()); + CHECK(!space->IsImageSpace()); + // The card-table corresponding to bump-pointer and non-moving space can + // be cleared, because we are going to traverse all the reachable objects + // in these spaces. This card-table will eventually be used to track + // mutations while concurrent marking is going on. + card_table->ClearCardRange(space->Begin(), space->Limit()); + if (space != bump_pointer_space_) { + CHECK_EQ(space, heap_->GetNonMovingSpace()); + non_moving_space_ = space; + non_moving_space_bitmap_ = space->GetMarkBitmap(); + } + } + } +} + +void MarkCompact::MarkZygoteLargeObjects() { + Thread* self = thread_running_gc_; + DCHECK_EQ(self, Thread::Current()); + space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace(); + if (los != nullptr) { + // Pick the current live bitmap (mark bitmap if swapped). + accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap(); + accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap(); + // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept. + std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic(); + live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first), + reinterpret_cast<uintptr_t>(range.second), + [mark_bitmap, los, self](mirror::Object* obj) + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (los->IsZygoteLargeObject(self, obj)) { + mark_bitmap->Set(obj); + } + }); + } +} + +void MarkCompact::InitializePhase() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + mark_stack_ = heap_->GetMarkStack(); + CHECK(mark_stack_->IsEmpty()); + immune_spaces_.Reset(); + moving_first_objs_count_ = 0; + non_moving_first_objs_count_ = 0; + black_page_count_ = 0; + bytes_scanned_ = 0; + freed_objects_ = 0; + // The first buffer is used by gc-thread. + compaction_buffer_counter_.store(1, std::memory_order_relaxed); + from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin(); + black_allocations_begin_ = bump_pointer_space_->Limit(); + walk_super_class_cache_ = nullptr; + // TODO: Would it suffice to read it once in the constructor, which is called + // in zygote process? + pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); +} + +class MarkCompact::ThreadFlipVisitor : public Closure { + public: + explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {} + + void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) { + // Note: self is not necessarily equal to thread since thread may be suspended. + Thread* self = Thread::Current(); + CHECK(thread == self || thread->GetState() != ThreadState::kRunnable) + << thread->GetState() << " thread " << thread << " self " << self; + thread->VisitRoots(collector_, kVisitRootFlagAllRoots); + // Interpreter cache is thread-local so it needs to be swept either in a + // flip, or a stop-the-world pause. + CHECK(collector_->compacting_); + thread->SweepInterpreterCache(collector_); + thread->AdjustTlab(collector_->black_objs_slide_diff_); + collector_->GetBarrier().Pass(self); + } + + private: + MarkCompact* const collector_; +}; + +class MarkCompact::FlipCallback : public Closure { + public: + explicit FlipCallback(MarkCompact* collector) : collector_(collector) {} + + void Run(Thread* thread ATTRIBUTE_UNUSED) override REQUIRES(Locks::mutator_lock_) { + collector_->CompactionPause(); + } + + private: + MarkCompact* const collector_; +}; + +void MarkCompact::RunPhases() { + Thread* self = Thread::Current(); + thread_running_gc_ = self; + Runtime* runtime = Runtime::Current(); + InitializePhase(); + GetHeap()->PreGcVerification(this); + { + ReaderMutexLock mu(self, *Locks::mutator_lock_); + MarkingPhase(); + } + { + // Marking pause + ScopedPause pause(this); + MarkingPause(); + if (kIsDebugBuild) { + bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked(); + } + } + // To increase likelihood of black allocations. For testing purposes only. + if (kIsDebugBuild && heap_->GetTaskProcessor()->GetRunningThread() == thread_running_gc_) { + usleep(500'000); + } + { + ReaderMutexLock mu(self, *Locks::mutator_lock_); + ReclaimPhase(); + PrepareForCompaction(); + } + if (uffd_ != kFallbackMode && !use_uffd_sigbus_) { + heap_->GetThreadPool()->WaitForWorkersToBeCreated(); + } + + { + // Compaction pause + gc_barrier_.Init(self, 0); + ThreadFlipVisitor visitor(this); + FlipCallback callback(this); + size_t barrier_count = runtime->GetThreadList()->FlipThreadRoots( + &visitor, &callback, this, GetHeap()->GetGcPauseListener()); + { + ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun); + gc_barrier_.Increment(self, barrier_count); + } + } + + if (IsValidFd(uffd_)) { + ReaderMutexLock mu(self, *Locks::mutator_lock_); + CompactionPhase(); + } + + FinishPhase(); + thread_running_gc_ = nullptr; + GetHeap()->PostGcVerification(this); +} + +void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) { + // Find the first live word first. + size_t to_space_page_idx = 0; + uint32_t offset_in_chunk_word; + uint32_t offset; + mirror::Object* obj; + const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin(); + + size_t chunk_idx; + // Find the first live word in the space + for (chunk_idx = 0; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) { + if (chunk_idx > vec_len) { + // We don't have any live data on the moving-space. + return; + } + } + // Use live-words bitmap to find the first word + offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0); + offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word; + DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset + << " chunk_idx=" << chunk_idx + << " N=0" + << " offset_in_word=" << offset_in_chunk_word + << " word=" << std::hex + << live_words_bitmap_->GetWord(chunk_idx); + // The first object doesn't require using FindPrecedingObject(). + obj = reinterpret_cast<mirror::Object*>(heap_begin + offset * kAlignment); + // TODO: add a check to validate the object. + + pre_compact_offset_moving_space_[to_space_page_idx] = offset; + first_objs_moving_space_[to_space_page_idx].Assign(obj); + to_space_page_idx++; + + uint32_t page_live_bytes = 0; + while (true) { + for (; page_live_bytes <= kPageSize; chunk_idx++) { + if (chunk_idx > vec_len) { + moving_first_objs_count_ = to_space_page_idx; + return; + } + page_live_bytes += chunk_info_vec_[chunk_idx]; + } + chunk_idx--; + page_live_bytes -= kPageSize; + DCHECK_LE(page_live_bytes, kOffsetChunkSize); + DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx]) + << " chunk_idx=" << chunk_idx + << " to_space_page_idx=" << to_space_page_idx + << " vec_len=" << vec_len; + DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes)); + offset_in_chunk_word = + live_words_bitmap_->FindNthLiveWordOffset( + chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment); + offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word; + DCHECK(live_words_bitmap_->Test(offset)) + << "offset=" << offset + << " chunk_idx=" << chunk_idx + << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment) + << " offset_in_word=" << offset_in_chunk_word + << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx); + // TODO: Can we optimize this for large objects? If we are continuing a + // large object that spans multiple pages, then we may be able to do without + // calling FindPrecedingObject(). + // + // Find the object which encapsulates offset in it, which could be + // starting at offset itself. + obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment); + // TODO: add a check to validate the object. + pre_compact_offset_moving_space_[to_space_page_idx] = offset; + first_objs_moving_space_[to_space_page_idx].Assign(obj); + to_space_page_idx++; + chunk_idx++; + } +} + +void MarkCompact::InitNonMovingSpaceFirstObjects() { + accounting::ContinuousSpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap(); + uintptr_t begin = reinterpret_cast<uintptr_t>(non_moving_space_->Begin()); + const uintptr_t end = reinterpret_cast<uintptr_t>(non_moving_space_->End()); + mirror::Object* prev_obj; + size_t page_idx; + { + // Find first live object + mirror::Object* obj = nullptr; + bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin, + end, + [&obj] (mirror::Object* o) { + obj = o; + }); + if (obj == nullptr) { + // There are no live objects in the non-moving space + return; + } + page_idx = (reinterpret_cast<uintptr_t>(obj) - begin) / kPageSize; + first_objs_non_moving_space_[page_idx++].Assign(obj); + prev_obj = obj; + } + // TODO: check obj is valid + uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj) + + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment); + // For every page find the object starting from which we need to call + // VisitReferences. It could either be an object that started on some + // preceding page, or some object starting within this page. + begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + kPageSize, kPageSize); + while (begin < end) { + // Utilize, if any, large object that started in some preceding page, but + // overlaps with this page as well. + if (prev_obj != nullptr && prev_obj_end > begin) { + DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin)); + first_objs_non_moving_space_[page_idx].Assign(prev_obj); + mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); + if (bump_pointer_space_->HasAddress(klass)) { + LOG(WARNING) << "found inter-page object " << prev_obj + << " in non-moving space with klass " << klass + << " in moving space"; + } + } else { + prev_obj_end = 0; + // It's sufficient to only search for previous object in the preceding page. + // If no live object started in that page and some object had started in + // the page preceding to that page, which was big enough to overlap with + // the current page, then we wouldn't be in the else part. + prev_obj = bitmap->FindPrecedingObject(begin, begin - kPageSize); + if (prev_obj != nullptr) { + prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj) + + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment); + } + if (prev_obj_end > begin) { + mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); + if (bump_pointer_space_->HasAddress(klass)) { + LOG(WARNING) << "found inter-page object " << prev_obj + << " in non-moving space with klass " << klass + << " in moving space"; + } + first_objs_non_moving_space_[page_idx].Assign(prev_obj); + } else { + // Find the first live object in this page + bitmap->VisitMarkedRange</*kVisitOnce*/ true>( + begin, + begin + kPageSize, + [this, page_idx] (mirror::Object* obj) { + first_objs_non_moving_space_[page_idx].Assign(obj); + }); + } + // An empty entry indicates that the page has no live objects and hence + // can be skipped. + } + begin += kPageSize; + page_idx++; + } + non_moving_first_objs_count_ = page_idx; +} + +bool MarkCompact::CanCompactMovingSpaceWithMinorFault() { + size_t min_size = (moving_first_objs_count_ + black_page_count_) * kPageSize; + return minor_fault_initialized_ && shadow_to_space_map_.IsValid() && + shadow_to_space_map_.Size() >= min_size; +} + +class MarkCompact::ConcurrentCompactionGcTask : public SelfDeletingTask { + public: + explicit ConcurrentCompactionGcTask(MarkCompact* collector, size_t idx) + : collector_(collector), index_(idx) {} + + void Run(Thread* self ATTRIBUTE_UNUSED) override REQUIRES_SHARED(Locks::mutator_lock_) { + if (collector_->CanCompactMovingSpaceWithMinorFault()) { + collector_->ConcurrentCompaction<MarkCompact::kMinorFaultMode>(/*buf=*/nullptr); + } else { + // The passed page/buf to ConcurrentCompaction is used by the thread as a + // kPageSize buffer for compacting and updating objects into and then + // passing the buf to uffd ioctls. + uint8_t* buf = collector_->compaction_buffers_map_.Begin() + index_ * kPageSize; + collector_->ConcurrentCompaction<MarkCompact::kCopyMode>(buf); + } + } + + private: + MarkCompact* const collector_; + size_t index_; +}; + +void MarkCompact::PrepareForCompaction() { + uint8_t* space_begin = bump_pointer_space_->Begin(); + size_t vector_len = (black_allocations_begin_ - space_begin) / kOffsetChunkSize; + DCHECK_LE(vector_len, vector_length_); + for (size_t i = 0; i < vector_len; i++) { + DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize); + DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i)); + } + InitMovingSpaceFirstObjects(vector_len); + InitNonMovingSpaceFirstObjects(); + + // TODO: We can do a lot of neat tricks with this offset vector to tune the + // compaction as we wish. Originally, the compaction algorithm slides all + // live objects towards the beginning of the heap. This is nice because it + // keeps the spatial locality of objects intact. + // However, sometimes it's desired to compact objects in certain portions + // of the heap. For instance, it is expected that, over time, + // objects towards the beginning of the heap are long lived and are always + // densely packed. In this case, it makes sense to only update references in + // there and not try to compact it. + // Furthermore, we might have some large objects and may not want to move such + // objects. + // We can adjust, without too much effort, the values in the chunk_info_vec_ such + // that the objects in the dense beginning area aren't moved. OTOH, large + // objects, which could be anywhere in the heap, could also be kept from + // moving by using a similar trick. The only issue is that by doing this we will + // leave an unused hole in the middle of the heap which can't be used for + // allocations until we do a *full* compaction. + // + // At this point every element in the chunk_info_vec_ contains the live-bytes + // of the corresponding chunk. For old-to-new address computation we need + // every element to reflect total live-bytes till the corresponding chunk. + + // Live-bytes count is required to compute post_compact_end_ below. + uint32_t total; + // Update the vector one past the heap usage as it is required for black + // allocated objects' post-compact address computation. + if (vector_len < vector_length_) { + vector_len++; + total = 0; + } else { + // Fetch the value stored in the last element before it gets overwritten by + // std::exclusive_scan(). + total = chunk_info_vec_[vector_len - 1]; + } + std::exclusive_scan(chunk_info_vec_, chunk_info_vec_ + vector_len, chunk_info_vec_, 0); + total += chunk_info_vec_[vector_len - 1]; + + for (size_t i = vector_len; i < vector_length_; i++) { + DCHECK_EQ(chunk_info_vec_[i], 0u); + } + post_compact_end_ = AlignUp(space_begin + total, kPageSize); + CHECK_EQ(post_compact_end_, space_begin + moving_first_objs_count_ * kPageSize); + black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_; + // How do we handle compaction of heap portion used for allocations after the + // marking-pause? + // All allocations after the marking-pause are considered black (reachable) + // for this GC cycle. However, they need not be allocated contiguously as + // different mutators use TLABs. So we will compact the heap till the point + // where allocations took place before the marking-pause. And everything after + // that will be slid with TLAB holes, and then TLAB info in TLS will be + // appropriately updated in the pre-compaction pause. + // The chunk-info vector entries for the post marking-pause allocations will be + // also updated in the pre-compaction pause. + + bool is_zygote = Runtime::Current()->IsZygote(); + if (!uffd_initialized_ && CreateUserfaultfd(/*post_fork*/false)) { + if (!use_uffd_sigbus_) { + // Register the buffer that we use for terminating concurrent compaction + struct uffdio_register uffd_register; + uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_); + uffd_register.range.len = kPageSize; + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; + CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) + << "ioctl_userfaultfd: register compaction termination page: " << strerror(errno); + } + if (!uffd_minor_fault_supported_ && shadow_to_space_map_.IsValid()) { + // A valid shadow-map for moving space is only possible if we + // were able to map it in the constructor. That also means that its size + // matches the moving-space. + CHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity()); + // Release the shadow map for moving-space if we don't support minor-fault + // as it's not required. + shadow_to_space_map_.Reset(); + } + } + // For zygote we create the thread pool each time before starting compaction, + // and get rid of it when finished. This is expected to happen rarely as + // zygote spends most of the time in native fork loop. + if (uffd_ != kFallbackMode) { + if (!use_uffd_sigbus_) { + ThreadPool* pool = heap_->GetThreadPool(); + if (UNLIKELY(pool == nullptr)) { + // On devices with 2 cores, GetParallelGCThreadCount() will return 1, + // which is desired number of workers on such devices. + heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); + pool = heap_->GetThreadPool(); + } + size_t num_threads = pool->GetThreadCount(); + thread_pool_counter_ = num_threads; + for (size_t i = 0; i < num_threads; i++) { + pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1)); + } + CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads); + } + /* + * Possible scenarios for mappings: + * A) All zygote GCs (or if minor-fault feature isn't available): uses + * uffd's copy mode + * 1) For moving-space ('to' space is same as the moving-space): + * a) Private-anonymous mappings for 'to' and 'from' space are created in + * the constructor. + * b) In the compaction pause, we mremap(dontunmap) from 'to' space to + * 'from' space. This results in moving all pages to 'from' space and + * emptying the 'to' space, thereby preparing it for userfaultfd + * registration. + * + * 2) For linear-alloc space: + * a) Private-anonymous mappings for the linear-alloc and its 'shadow' + * are created by the arena-pool. + * b) In the compaction pause, we mremap(dontumap) with similar effect as + * (A.1.b) above. + * + * B) First GC after zygote: uses uffd's copy-mode + * 1) For moving-space: + * a) If the mmap for shadow-map has been successful in the constructor, + * then we remap it (mmap with MAP_FIXED) to get a shared-anonymous + * mapping. + * b) Else, we create two memfd and ftruncate them to the moving-space + * size. + * c) Same as (A.1.b) + * d) If (B.1.a), then mremap(dontunmap) from shadow-map to + * 'to' space. This will make both of them map to the same pages + * e) If (B.1.b), then mmap with the first memfd in shared mode on the + * 'to' space. + * f) At the end of compaction, we will have moved the moving-space + * objects to a MAP_SHARED mapping, readying it for minor-fault from next + * GC cycle. + * + * 2) For linear-alloc space: + * a) Same as (A.2.b) + * b) mmap a shared-anonymous mapping onto the linear-alloc space. + * c) Same as (B.1.f) + * + * C) All subsequent GCs: preferable minor-fault mode. But may also require + * using copy-mode. + * 1) For moving-space: + * a) If the shadow-map is created and no memfd was used, then that means + * we are using shared-anonymous. Therefore, mmap a shared-anonymous on + * the shadow-space. + * b) If the shadow-map is not mapped yet, then mmap one with a size + * big enough to hold the compacted moving space. This may fail, in which + * case we will use uffd's copy-mode. + * c) If (b) is successful, then mmap the free memfd onto shadow-map. + * d) Same as (A.1.b) + * e) In compaction pause, if the shadow-map was not created, then use + * copy-mode. + * f) Else, if the created map is smaller than the required-size, then + * use mremap (without dontunmap) to expand the size. If failed, then use + * copy-mode. + * g) Otherwise, same as (B.1.d) and use minor-fault mode. + * + * 2) For linear-alloc space: + * a) Same as (A.2.b) + * b) Use minor-fault mode + */ + auto mmap_shadow_map = [this](int flags, int fd) { + void* ret = mmap(shadow_to_space_map_.Begin(), + shadow_to_space_map_.Size(), + PROT_READ | PROT_WRITE, + flags, + fd, + /*offset=*/0); + DCHECK_NE(ret, MAP_FAILED) << "mmap for moving-space shadow failed:" << strerror(errno); + }; + // Setup all the virtual memory ranges required for concurrent compaction. + if (minor_fault_initialized_) { + DCHECK(!is_zygote); + if (UNLIKELY(!shadow_to_space_map_.IsValid())) { + // This case happens only once on the first GC in minor-fault mode, if + // we were unable to reserve shadow-map for moving-space in the + // beginning. + DCHECK_GE(moving_to_space_fd_, 0); + // Take extra 4MB to reduce the likelihood of requiring resizing this + // map in the pause due to black allocations. + size_t reqd_size = std::min(moving_first_objs_count_ * kPageSize + 4 * MB, + bump_pointer_space_->Capacity()); + // We cannot support memory-tool with shadow-map (as it requires + // appending a redzone) in this case because the mapping may have to be expanded + // using mremap (in KernelPreparation()), which would ignore the redzone. + // MemMap::MapFile() appends a redzone, but MemMap::MapAnonymous() doesn't. + std::string err_msg; + shadow_to_space_map_ = MemMap::MapAnonymous("moving-space-shadow", + reqd_size, + PROT_NONE, + /*low_4gb=*/kObjPtrPoisoning, + &err_msg); + + if (shadow_to_space_map_.IsValid()) { + CHECK(!kMemoryToolAddsRedzones || shadow_to_space_map_.GetRedzoneSize() == 0u); + // We want to use MemMap to get low-4GB mapping, if required, but then also + // want to have its ownership as we may grow it (in + // KernelPreparation()). If the ownership is not taken and we try to + // resize MemMap, then it unmaps the virtual range. + MemMap temp = shadow_to_space_map_.TakeReservedMemory(shadow_to_space_map_.Size(), + /*reuse*/ true); + std::swap(temp, shadow_to_space_map_); + DCHECK(!temp.IsValid()); + } else { + LOG(WARNING) << "Failed to create moving space's shadow map of " << PrettySize(reqd_size) + << " size. " << err_msg; + } + } + + if (LIKELY(shadow_to_space_map_.IsValid())) { + int fd = moving_to_space_fd_; + int mmap_flags = MAP_SHARED | MAP_FIXED; + if (fd == kFdUnused) { + // Unused moving-to-space fd means we are using anonymous shared + // mapping. + DCHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity()); + mmap_flags |= MAP_ANONYMOUS; + fd = -1; + } + // If the map is smaller than required, then we'll do mremap in the + // compaction pause to increase the size. + mmap_shadow_map(mmap_flags, fd); + } + + for (auto& data : linear_alloc_spaces_data_) { + DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_READ | PROT_WRITE), 0) + << "mprotect failed: " << strerror(errno); + } + } else if (!is_zygote && uffd_minor_fault_supported_) { + // First GC after zygote-fork. We will still use uffd's copy mode but will + // use it to move objects to MAP_SHARED (to prepare for subsequent GCs, which + // will use uffd's minor-fault feature). + if (shadow_to_space_map_.IsValid() && + shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) { + mmap_shadow_map(MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, /*fd=*/-1); + } else { + size_t size = bump_pointer_space_->Capacity(); + DCHECK_EQ(moving_to_space_fd_, kFdUnused); + DCHECK_EQ(moving_from_space_fd_, kFdUnused); + const char* name = bump_pointer_space_->GetName(); + moving_to_space_fd_ = memfd_create(name, MFD_CLOEXEC); + CHECK_NE(moving_to_space_fd_, -1) + << "memfd_create: failed for " << name << ": " << strerror(errno); + moving_from_space_fd_ = memfd_create(name, MFD_CLOEXEC); + CHECK_NE(moving_from_space_fd_, -1) + << "memfd_create: failed for " << name << ": " << strerror(errno); + + // memfds are considered as files from resource limits point of view. + // And the moving space could be several hundred MBs. So increase the + // limit, if it's lower than moving-space size. + bool rlimit_changed = false; + rlimit rlim_read; + CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlim_read), 0) << "getrlimit failed: " << strerror(errno); + if (rlim_read.rlim_cur < size) { + rlimit_changed = true; + rlimit rlim = rlim_read; + rlim.rlim_cur = size; + CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim), 0) << "setrlimit failed: " << strerror(errno); + } + + // moving-space will map this fd so that we compact objects into it. + int ret = ftruncate(moving_to_space_fd_, size); + CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno); + ret = ftruncate(moving_from_space_fd_, size); + CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno); + + if (rlimit_changed) { + // reset the rlimit to the original limits. + CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim_read), 0) + << "setrlimit failed: " << strerror(errno); + } + } + } + } +} + +class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor { + public: + explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { } + + void VisitRoot(mirror::Object* root, const RootInfo& info) override + REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { + CHECK(collector_->IsMarked(root) != nullptr) << info.ToString(); + } + + private: + MarkCompact* const collector_; +}; + +void MarkCompact::ReMarkRoots(Runtime* runtime) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + DCHECK_EQ(thread_running_gc_, Thread::Current()); + Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_); + MarkNonThreadRoots(runtime); + MarkConcurrentRoots(static_cast<VisitRootFlags>(kVisitRootFlagNewRoots + | kVisitRootFlagStopLoggingNewRoots + | kVisitRootFlagClearRootLog), + runtime); + + if (kVerifyRootsMarked) { + TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings()); + VerifyRootMarkedVisitor visitor(this); + runtime->VisitRoots(&visitor); + } +} + +void MarkCompact::MarkingPause() { + TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings()); + Runtime* runtime = Runtime::Current(); + Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_); + { + // Handle the dirty objects as we are a concurrent GC + WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_); + { + MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_); + MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_); + std::list<Thread*> thread_list = runtime->GetThreadList()->GetList(); + for (Thread* thread : thread_list) { + thread->VisitRoots(this, static_cast<VisitRootFlags>(0)); + DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr); + // Need to revoke all the thread-local allocation stacks since we will + // swap the allocation stacks (below) and don't want anybody to allocate + // into the live stack. + thread->RevokeThreadLocalAllocationStack(); + bump_pointer_space_->RevokeThreadLocalBuffers(thread); + } + } + // Fetch only the accumulated objects-allocated count as it is guaranteed to + // be up-to-date after the TLAB revocation above. + freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated(); + // Capture 'end' of moving-space at this point. Every allocation beyond this + // point will be considered as black. + // Align-up to page boundary so that black allocations happen from next page + // onwards. Also, it ensures that 'end' is aligned for card-table's + // ClearCardRange(). + black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, kPageSize); + DCHECK(IsAligned<kAlignment>(black_allocations_begin_)); + black_allocations_begin_ = AlignUp(black_allocations_begin_, kPageSize); + + // Re-mark root set. Doesn't include thread-roots as they are already marked + // above. + ReMarkRoots(runtime); + // Scan dirty objects. + RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty); + { + TimingLogger::ScopedTiming t2("SwapStacks", GetTimings()); + heap_->SwapStacks(); + live_stack_freeze_size_ = heap_->GetLiveStack()->Size(); + } + } + // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk + // objects in bump-pointer space when we have a mark-bitmap to indicate live + // objects. At the same time we also need to be able to visit black allocations, + // even though they are not marked in the bitmap. Without both of these we fail + // pre-sweeping verification. As well as we leave windows open wherein a + // VisitObjects/Walk on the space would either miss some objects or visit + // unreachable ones. These windows are when we are switching from shared + // mutator-lock to exclusive and vice-versa starting from here till compaction pause. + // heap_->PreSweepingGcVerification(this); + + // Disallow new system weaks to prevent a race which occurs when someone adds + // a new system weak before we sweep them. Since this new system weak may not + // be marked, the GC may incorrectly sweep it. This also fixes a race where + // interning may attempt to return a strong reference to a string that is + // about to be swept. + runtime->DisallowNewSystemWeaks(); + // Enable the reference processing slow path, needs to be done with mutators + // paused since there is no lock in the GetReferent fast path. + heap_->GetReferenceProcessor()->EnableSlowPath(); +} + +void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) { + TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks", + GetTimings()); + ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); + runtime->SweepSystemWeaks(this); +} + +void MarkCompact::ProcessReferences(Thread* self) { + WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); + GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings()); +} + +void MarkCompact::Sweep(bool swap_bitmaps) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + // Ensure that nobody inserted objects in the live stack after we swapped the + // stacks. + CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size()); + { + TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings()); + // Mark everything allocated since the last GC as live so that we can sweep + // concurrently, knowing that new allocations won't be marked as live. + accounting::ObjectStack* live_stack = heap_->GetLiveStack(); + heap_->MarkAllocStackAsLive(live_stack); + live_stack->Reset(); + DCHECK(mark_stack_->IsEmpty()); + } + for (const auto& space : GetHeap()->GetContinuousSpaces()) { + if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_) { + space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); + TimingLogger::ScopedTiming split( + alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", + GetTimings()); + RecordFree(alloc_space->Sweep(swap_bitmaps)); + } + } + SweepLargeObjects(swap_bitmaps); +} + +void MarkCompact::SweepLargeObjects(bool swap_bitmaps) { + space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace(); + if (los != nullptr) { + TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings()); + RecordFreeLOS(los->Sweep(swap_bitmaps)); + } +} + +void MarkCompact::ReclaimPhase() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + DCHECK(thread_running_gc_ == Thread::Current()); + Runtime* const runtime = Runtime::Current(); + // Process the references concurrently. + ProcessReferences(thread_running_gc_); + // TODO: Try to merge this system-weak sweeping with the one while updating + // references during the compaction pause. + SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false); + runtime->AllowNewSystemWeaks(); + // Clean up class loaders after system weaks are swept since that is how we know if class + // unloading occurred. + runtime->GetClassLinker()->CleanupClassLoaders(); + { + WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_); + // Reclaim unmarked objects. + Sweep(false); + // Swap the live and mark bitmaps for each space which we modified space. This is an + // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound + // bitmaps. + SwapBitmaps(); + // Unbind the live and mark bitmaps. + GetHeap()->UnBindBitmaps(); + } +} + +// We want to avoid checking for every reference if it's within the page or +// not. This can be done if we know where in the page the holder object lies. +// If it doesn't overlap either boundaries then we can skip the checks. +template <bool kCheckBegin, bool kCheckEnd> +class MarkCompact::RefsUpdateVisitor { + public: + explicit RefsUpdateVisitor(MarkCompact* collector, + mirror::Object* obj, + uint8_t* begin, + uint8_t* end) + : collector_(collector), obj_(obj), begin_(begin), end_(end) { + DCHECK(!kCheckBegin || begin != nullptr); + DCHECK(!kCheckEnd || end != nullptr); + } + + void operator()(mirror::Object* old ATTRIBUTE_UNUSED, MemberOffset offset, bool /* is_static */) + const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES_SHARED(Locks::heap_bitmap_lock_) { + bool update = true; + if (kCheckBegin || kCheckEnd) { + uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value(); + update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_); + } + if (update) { + collector_->UpdateRef(obj_, offset); + } + } + + // For object arrays we don't need to check boundaries here as it's done in + // VisitReferenes(). + // TODO: Optimize reference updating using SIMD instructions. Object arrays + // are perfect as all references are tightly packed. + void operator()(mirror::Object* old ATTRIBUTE_UNUSED, + MemberOffset offset, + bool /*is_static*/, + bool /*is_obj_array*/) + const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES_SHARED(Locks::heap_bitmap_lock_) { + collector_->UpdateRef(obj_, offset); + } + + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const + ALWAYS_INLINE + REQUIRES_SHARED(Locks::mutator_lock_) { + if (!root->IsNull()) { + VisitRoot(root); + } + } + + void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const + ALWAYS_INLINE + REQUIRES_SHARED(Locks::mutator_lock_) { + collector_->UpdateRoot(root); + } + + private: + MarkCompact* const collector_; + mirror::Object* const obj_; + uint8_t* const begin_; + uint8_t* const end_; +}; + +bool MarkCompact::IsValidObject(mirror::Object* obj) const { + mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>(); + if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) { + return false; + } + return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>( + obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>()); +} + +template <typename Callback> +void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const { + if (kIsDebugBuild) { + mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>(); + mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>(); + mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>(); + mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>(); + if (bump_pointer_space_->HasAddress(pre_compact_klass) && + reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) { + CHECK(moving_space_bitmap_->Test(pre_compact_klass)) + << "ref=" << ref + << " post_compact_end=" << static_cast<void*>(post_compact_end_) + << " pre_compact_klass=" << pre_compact_klass + << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_); + CHECK(live_words_bitmap_->Test(pre_compact_klass)); + } + if (!IsValidObject(ref)) { + std::ostringstream oss; + oss << "Invalid object: " + << "ref=" << ref + << " klass=" << klass + << " klass_klass=" << klass_klass + << " klass_klass_klass=" << klass_klass_klass + << " pre_compact_klass=" << pre_compact_klass + << " from_space_begin=" << static_cast<void*>(from_space_begin_) + << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin()) + << " post_compact_end=" << static_cast<void*>(post_compact_end_) + << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_); + + // Call callback before dumping larger data like RAM and space dumps. + callback(oss); + + oss << " \nobject=" + << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128) + << " \nklass(from)=" + << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128) + << "spaces:\n"; + heap_->DumpSpaces(oss); + LOG(FATAL) << oss.str(); + } + } +} + +void MarkCompact::CompactPage(mirror::Object* obj, + uint32_t offset, + uint8_t* addr, + bool needs_memset_zero) { + DCHECK(moving_space_bitmap_->Test(obj) + && live_words_bitmap_->Test(obj)); + DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj + << " offset=" << offset + << " addr=" << static_cast<void*>(addr) + << " black_allocs_begin=" + << static_cast<void*>(black_allocations_begin_) + << " post_compact_addr=" + << static_cast<void*>(post_compact_end_); + uint8_t* const start_addr = addr; + // How many distinct live-strides do we have. + size_t stride_count = 0; + uint8_t* last_stride = addr; + uint32_t last_stride_begin = 0; + auto verify_obj_callback = [&] (std::ostream& os) { + os << " stride_count=" << stride_count + << " last_stride=" << static_cast<void*>(last_stride) + << " offset=" << offset + << " start_addr=" << static_cast<void*>(start_addr); + }; + obj = GetFromSpaceAddr(obj); + live_words_bitmap_->VisitLiveStrides(offset, + black_allocations_begin_, + kPageSize, + [&addr, + &last_stride, + &stride_count, + &last_stride_begin, + verify_obj_callback, + this] (uint32_t stride_begin, + size_t stride_size, + bool /*is_last*/) + REQUIRES_SHARED(Locks::mutator_lock_) { + const size_t stride_in_bytes = stride_size * kAlignment; + DCHECK_LE(stride_in_bytes, kPageSize); + last_stride_begin = stride_begin; + DCHECK(IsAligned<kAlignment>(addr)); + memcpy(addr, + from_space_begin_ + stride_begin * kAlignment, + stride_in_bytes); + if (kIsDebugBuild) { + uint8_t* space_begin = bump_pointer_space_->Begin(); + // We can interpret the first word of the stride as an + // obj only from second stride onwards, as the first + // stride's first-object may have started on previous + // page. The only exception is the first page of the + // moving space. + if (stride_count > 0 + || stride_begin * kAlignment < kPageSize) { + mirror::Object* o = + reinterpret_cast<mirror::Object*>(space_begin + + stride_begin + * kAlignment); + CHECK(live_words_bitmap_->Test(o)) << "ref=" << o; + CHECK(moving_space_bitmap_->Test(o)) + << "ref=" << o + << " bitmap: " + << moving_space_bitmap_->DumpMemAround(o); + VerifyObject(reinterpret_cast<mirror::Object*>(addr), + verify_obj_callback); + } + } + last_stride = addr; + addr += stride_in_bytes; + stride_count++; + }); + DCHECK_LT(last_stride, start_addr + kPageSize); + DCHECK_GT(stride_count, 0u); + size_t obj_size = 0; + uint32_t offset_within_obj = offset * kAlignment + - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_); + // First object + if (offset_within_obj > 0) { + mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj); + if (stride_count > 1) { + RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this, + to_ref, + start_addr, + nullptr); + obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>( + visitor, MemberOffset(offset_within_obj), MemberOffset(-1)); + } else { + RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this, + to_ref, + start_addr, + start_addr + kPageSize); + obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>( + visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj + + kPageSize)); + } + obj_size = RoundUp(obj_size, kAlignment); + DCHECK_GT(obj_size, offset_within_obj) + << "obj:" << obj + << " class:" + << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>() + << " to_addr:" << to_ref + << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_) + << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_) + << " offset:" << offset * kAlignment + << " class-after-obj-iter:" + << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ? + class_after_obj_iter_->first.AsMirrorPtr() : nullptr) + << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_) + << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_ + << " offset-of-last-idx:" + << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment + << " first-obj-of-last-idx:" + << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr(); + + obj_size -= offset_within_obj; + // If there is only one stride, then adjust last_stride_begin to the + // end of the first object. + if (stride_count == 1) { + last_stride_begin += obj_size / kAlignment; + } + } + + // Except for the last page being compacted, the pages will have addr == + // start_addr + kPageSize. + uint8_t* const end_addr = addr; + addr = start_addr; + size_t bytes_done = obj_size; + // All strides except the last one can be updated without any boundary + // checks. + DCHECK_LE(addr, last_stride); + size_t bytes_to_visit = last_stride - addr; + DCHECK_LE(bytes_to_visit, kPageSize); + while (bytes_to_visit > bytes_done) { + mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done); + VerifyObject(ref, verify_obj_callback); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> + visitor(this, ref, nullptr, nullptr); + obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1)); + obj_size = RoundUp(obj_size, kAlignment); + bytes_done += obj_size; + } + // Last stride may have multiple objects in it and we don't know where the + // last object which crosses the page boundary starts, therefore check + // page-end in all of these objects. Also, we need to call + // VisitRefsForCompaction() with from-space object as we fetch object size, + // which in case of klass requires 'class_size_'. + uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment; + bytes_to_visit = end_addr - addr; + DCHECK_LE(bytes_to_visit, kPageSize); + while (bytes_to_visit > bytes_done) { + mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done); + obj = reinterpret_cast<mirror::Object*>(from_addr); + VerifyObject(ref, verify_obj_callback); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> + visitor(this, ref, nullptr, start_addr + kPageSize); + obj_size = obj->VisitRefsForCompaction(visitor, + MemberOffset(0), + MemberOffset(end_addr - (addr + bytes_done))); + obj_size = RoundUp(obj_size, kAlignment); + DCHECK_GT(obj_size, 0u) + << "from_addr:" << obj + << " from-space-class:" + << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>() + << " to_addr:" << ref + << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_) + << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_) + << " offset:" << offset * kAlignment + << " bytes_done:" << bytes_done + << " class-after-obj-iter:" + << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ? + class_after_obj_iter_->first.AsMirrorPtr() : nullptr) + << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_) + << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_ + << " offset-of-last-idx:" + << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment + << " first-obj-of-last-idx:" + << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr(); + + from_addr += obj_size; + bytes_done += obj_size; + } + // The last page that we compact may have some bytes left untouched in the + // end, we should zero them as the kernel copies at page granularity. + if (needs_memset_zero && UNLIKELY(bytes_done < kPageSize)) { + std::memset(addr + bytes_done, 0x0, kPageSize - bytes_done); + } +} + +// We store the starting point (pre_compact_page - first_obj) and first-chunk's +// size. If more TLAB(s) started in this page, then those chunks are identified +// using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations(). +// If we find a set bit in the bitmap, then we copy the remaining page and then +// use the bitmap to visit each object for updating references. +void MarkCompact::SlideBlackPage(mirror::Object* first_obj, + const size_t page_idx, + uint8_t* const pre_compact_page, + uint8_t* dest, + bool needs_memset_zero) { + DCHECK(IsAligned<kPageSize>(pre_compact_page)); + size_t bytes_copied; + const uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx]; + mirror::Object* next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr(); + uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj)); + uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj); + uint8_t* const pre_compact_page_end = pre_compact_page + kPageSize; + uint8_t* const dest_page_end = dest + kPageSize; + + auto verify_obj_callback = [&] (std::ostream& os) { + os << " first_obj=" << first_obj + << " next_page_first_obj=" << next_page_first_obj + << " first_chunk_sie=" << first_chunk_size + << " dest=" << static_cast<void*>(dest) + << " pre_compact_page=" + << static_cast<void* const>(pre_compact_page); + }; + // We have empty portion at the beginning of the page. Zero it. + if (pre_compact_addr > pre_compact_page) { + bytes_copied = pre_compact_addr - pre_compact_page; + DCHECK_LT(bytes_copied, kPageSize); + if (needs_memset_zero) { + std::memset(dest, 0x0, bytes_copied); + } + dest += bytes_copied; + } else { + bytes_copied = 0; + size_t offset = pre_compact_page - pre_compact_addr; + pre_compact_addr = pre_compact_page; + src_addr += offset; + DCHECK(IsAligned<kPageSize>(src_addr)); + } + // Copy the first chunk of live words + std::memcpy(dest, src_addr, first_chunk_size); + // Update references in the first chunk. Use object size to find next object. + { + size_t bytes_to_visit = first_chunk_size; + size_t obj_size; + // The first object started in some previous page. So we need to check the + // beginning. + DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr); + size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj); + if (bytes_copied == 0 && offset > 0) { + mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset); + mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset); + // If the next page's first-obj is in this page or nullptr, then we don't + // need to check end boundary + if (next_page_first_obj == nullptr + || (first_obj != next_page_first_obj + && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) { + RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this, + to_obj, + dest, + nullptr); + obj_size = from_obj->VisitRefsForCompaction< + /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor, + MemberOffset(offset), + MemberOffset(-1)); + } else { + RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this, + to_obj, + dest, + dest_page_end); + obj_size = from_obj->VisitRefsForCompaction< + /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor, + MemberOffset(offset), + MemberOffset(offset + + kPageSize)); + if (first_obj == next_page_first_obj) { + // First object is the only object on this page. So there's nothing else left to do. + return; + } + } + obj_size = RoundUp(obj_size, kAlignment); + obj_size -= offset; + dest += obj_size; + bytes_to_visit -= obj_size; + } + bytes_copied += first_chunk_size; + // If the last object in this page is next_page_first_obj, then we need to check end boundary + bool check_last_obj = false; + if (next_page_first_obj != nullptr + && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end + && bytes_copied == kPageSize) { + size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj); + DCHECK_LE(diff, kPageSize); + DCHECK_LE(diff, bytes_to_visit); + bytes_to_visit -= diff; + check_last_obj = true; + } + while (bytes_to_visit > 0) { + mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest); + VerifyObject(dest_obj, verify_obj_callback); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this, + dest_obj, + nullptr, + nullptr); + obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1)); + obj_size = RoundUp(obj_size, kAlignment); + bytes_to_visit -= obj_size; + dest += obj_size; + } + DCHECK_EQ(bytes_to_visit, 0u); + if (check_last_obj) { + mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest); + VerifyObject(dest_obj, verify_obj_callback); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this, + dest_obj, + nullptr, + dest_page_end); + mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj); + obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, + MemberOffset(0), + MemberOffset(dest_page_end - dest)); + return; + } + } + + // Probably a TLAB finished on this page and/or a new TLAB started as well. + if (bytes_copied < kPageSize) { + src_addr += first_chunk_size; + pre_compact_addr += first_chunk_size; + // Use mark-bitmap to identify where objects are. First call + // VisitMarkedRange for only the first marked bit. If found, zero all bytes + // until that object and then call memcpy on the rest of the page. + // Then call VisitMarkedRange for all marked bits *after* the one found in + // this invocation. This time to visit references. + uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr); + uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end); + mirror::Object* found_obj = nullptr; + moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit, + page_end, + [&found_obj](mirror::Object* obj) { + found_obj = obj; + }); + size_t remaining_bytes = kPageSize - bytes_copied; + if (found_obj == nullptr) { + if (needs_memset_zero) { + // No more black objects in this page. Zero the remaining bytes and return. + std::memset(dest, 0x0, remaining_bytes); + } + return; + } + // Copy everything in this page, which includes any zeroed regions + // in-between. + std::memcpy(dest, src_addr, remaining_bytes); + DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end); + moving_space_bitmap_->VisitMarkedRange( + reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize, + page_end, + [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj) + REQUIRES_SHARED(Locks::mutator_lock_) { + ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr; + mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff); + VerifyObject(ref, verify_obj_callback); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> + visitor(this, ref, nullptr, nullptr); + ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, + MemberOffset(0), + MemberOffset(-1)); + // Remember for next round. + found_obj = obj; + }); + // found_obj may have been updated in VisitMarkedRange. Visit the last found + // object. + DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr); + DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end); + ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr; + mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff); + VerifyObject(ref, verify_obj_callback); + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this, + ref, + nullptr, + dest_page_end); + ref->VisitRefsForCompaction</*kFetchObjSize*/false>( + visitor, MemberOffset(0), MemberOffset(page_end - + reinterpret_cast<uintptr_t>(found_obj))); + } +} + +template <bool kFirstPageMapping> +void MarkCompact::MapProcessedPages(uint8_t* to_space_start, + Atomic<PageState>* state_arr, + size_t arr_idx, + size_t arr_len) { + DCHECK(minor_fault_initialized_); + DCHECK_LT(arr_idx, arr_len); + DCHECK_ALIGNED(to_space_start, kPageSize); + // Claim all the contiguous pages, which are ready to be mapped, and then do + // so in a single ioctl. This helps avoid the overhead of invoking syscall + // several times and also maps the already-processed pages, avoiding + // unnecessary faults on them. + size_t length = kFirstPageMapping ? kPageSize : 0; + if (kFirstPageMapping) { + arr_idx++; + } + // We need to guarantee that we don't end up sucsessfully marking a later + // page 'mapping' and then fail to mark an earlier page. To guarantee that + // we use acq_rel order. + for (; arr_idx < arr_len; arr_idx++, length += kPageSize) { + PageState expected_state = PageState::kProcessed; + if (!state_arr[arr_idx].compare_exchange_strong( + expected_state, PageState::kProcessedAndMapping, std::memory_order_acq_rel)) { + break; + } + } + if (length > 0) { + // Note: We need the first page to be attempted (to be mapped) by the ioctl + // as this function is called due to some mutator thread waiting on the + // 'to_space_start' page. Therefore, the ioctl must always be called + // with 'to_space_start' as the 'start' address because it can bail out in + // the middle (not attempting to map the subsequent pages) if it finds any + // page either already mapped in between, or missing on the shadow-map. + struct uffdio_continue uffd_continue; + uffd_continue.range.start = reinterpret_cast<uintptr_t>(to_space_start); + uffd_continue.range.len = length; + uffd_continue.mode = 0; + int ret = ioctl(uffd_, UFFDIO_CONTINUE, &uffd_continue); + if (UNLIKELY(ret == -1 && errno == EAGAIN)) { + // This can happen only in linear-alloc. + DCHECK(linear_alloc_spaces_data_.end() != + std::find_if(linear_alloc_spaces_data_.begin(), + linear_alloc_spaces_data_.end(), + [to_space_start](const LinearAllocSpaceData& data) { + return data.begin_ <= to_space_start && to_space_start < data.end_; + })); + + // This could happen if userfaultfd couldn't find any pages mapped in the + // shadow map. For instance, if there are certain (contiguous) pages on + // linear-alloc which are allocated and have first-object set-up but have + // not been accessed yet. + // Bail out by setting the remaining pages' state back to kProcessed and + // then waking up any waiting threads. + DCHECK_GE(uffd_continue.mapped, 0); + DCHECK_ALIGNED(uffd_continue.mapped, kPageSize); + DCHECK_LT(uffd_continue.mapped, static_cast<ssize_t>(length)); + if (kFirstPageMapping) { + // In this case the first page must be mapped. + DCHECK_GE(uffd_continue.mapped, static_cast<ssize_t>(kPageSize)); + } + // Nobody would modify these pages' state simultaneously so only atomic + // store is sufficient. Use 'release' order to ensure that all states are + // modified sequentially. + for (size_t remaining_len = length - uffd_continue.mapped; remaining_len > 0; + remaining_len -= kPageSize) { + arr_idx--; + DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed), + PageState::kProcessedAndMapping); + state_arr[arr_idx].store(PageState::kProcessed, std::memory_order_release); + } + uffd_continue.range.start = + reinterpret_cast<uintptr_t>(to_space_start) + uffd_continue.mapped; + uffd_continue.range.len = length - uffd_continue.mapped; + ret = ioctl(uffd_, UFFDIO_WAKE, &uffd_continue.range); + CHECK_EQ(ret, 0) << "ioctl_userfaultfd: wake failed: " << strerror(errno); + } else { + // We may receive ENOENT if gc-thread unregisters the + // range behind our back, which is fine because that + // happens only when it knows compaction is done. + CHECK(ret == 0 || !kFirstPageMapping || errno == ENOENT) + << "ioctl_userfaultfd: continue failed: " << strerror(errno); + if (ret == 0) { + DCHECK_EQ(uffd_continue.mapped, static_cast<ssize_t>(length)); + } + } + if (use_uffd_sigbus_) { + // Nobody else would modify these pages' state simultaneously so atomic + // store is sufficient. + for (; uffd_continue.mapped > 0; uffd_continue.mapped -= kPageSize) { + arr_idx--; + DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed), + PageState::kProcessedAndMapping); + state_arr[arr_idx].store(PageState::kProcessedAndMapped, std::memory_order_release); + } + } + } +} + +void MarkCompact::ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent) { + struct uffdio_zeropage uffd_zeropage; + DCHECK(IsAligned<kPageSize>(addr)); + uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr); + uffd_zeropage.range.len = kPageSize; + uffd_zeropage.mode = 0; + int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage); + if (LIKELY(ret == 0)) { + DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize)); + } else { + CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST)) + << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr; + } +} + +void MarkCompact::CopyIoctl(void* dst, void* buffer) { + struct uffdio_copy uffd_copy; + uffd_copy.src = reinterpret_cast<uintptr_t>(buffer); + uffd_copy.dst = reinterpret_cast<uintptr_t>(dst); + uffd_copy.len = kPageSize; + uffd_copy.mode = 0; + CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) + << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer + << " dst:" << dst; + DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); +} + +template <int kMode, typename CompactionFn> +void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx, + size_t status_arr_len, + uint8_t* to_space_page, + uint8_t* page, + CompactionFn func) { + PageState expected_state = PageState::kUnprocessed; + PageState desired_state = + kMode == kCopyMode ? PageState::kProcessingAndMapping : PageState::kProcessing; + // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update + // to moving_spaces_status_[page_idx] is released before the contents of the page are + // made accessible to other threads. + // + // We need acquire ordering here to ensure that when the CAS fails, another thread + // has completed processing the page, which is guaranteed by the release below. + if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong( + expected_state, desired_state, std::memory_order_acquire)) { + func(); + if (kMode == kCopyMode) { + CopyIoctl(to_space_page, page); + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread would modify the status at this point. + moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped, + std::memory_order_release); + } + } else if (kMode == kMinorFaultMode) { + expected_state = PageState::kProcessing; + desired_state = PageState::kProcessed; + // the CAS needs to be with release order to ensure that stores to the + // page makes it to memory *before* other threads observe that it's + // ready to be mapped. + if (!moving_pages_status_[page_idx].compare_exchange_strong( + expected_state, desired_state, std::memory_order_release)) { + // Some mutator has requested to map the page after processing it. + DCHECK_EQ(expected_state, PageState::kProcessingAndMapping); + MapProcessedPages</*kFirstPageMapping=*/true>( + to_space_page, moving_pages_status_, page_idx, status_arr_len); + } + } + } else { + DCHECK_GT(expected_state, PageState::kProcessed); + } +} + +void MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode) { + // Thanks to sliding compaction, bump-pointer allocations, and reverse + // compaction (see CompactMovingSpace) the logic here is pretty simple: find + // the to-space page up to which compaction has finished, all the from-space + // pages corresponding to this onwards can be freed. There are some corner + // cases to be taken care of, which are described below. + size_t idx = last_checked_reclaim_page_idx_; + // Find the to-space page up to which the corresponding from-space pages can be + // freed. + for (; idx > cur_page_idx; idx--) { + PageState state = moving_pages_status_[idx - 1].load(std::memory_order_acquire); + if (state == PageState::kMutatorProcessing) { + // Some mutator is working on the page. + break; + } + DCHECK(state >= PageState::kProcessed || + (state == PageState::kUnprocessed && + (mode == kFallbackMode || idx > moving_first_objs_count_))); + } + DCHECK_LE(idx, last_checked_reclaim_page_idx_); + if (idx == last_checked_reclaim_page_idx_) { + // Nothing to do. + return; + } + + uint8_t* reclaim_begin; + uint8_t* idx_addr; + // Calculate the first from-space page to be freed using 'idx'. If the + // first-object of the idx'th to-space page started before the corresponding + // from-space page, which is almost always the case in the compaction portion + // of the moving-space, then it indicates that the subsequent pages that are + // yet to be compacted will need the from-space pages. Therefore, find the page + // (from the already compacted pages) whose first-object is different from + // ours. All the from-space pages starting from that one are safe to be + // removed. Please note that this iteration is not expected to be long in + // normal cases as objects are smaller than page size. + if (idx >= moving_first_objs_count_) { + // black-allocated portion of the moving-space + idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * kPageSize; + reclaim_begin = idx_addr; + mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr(); + if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) { + size_t idx_len = moving_first_objs_count_ + black_page_count_; + for (size_t i = idx + 1; i < idx_len; i++) { + mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr(); + // A null first-object indicates that the corresponding to-space page is + // not used yet. So we can compute its from-space page and use that. + if (obj != first_obj) { + reclaim_begin = obj != nullptr + ? AlignUp(reinterpret_cast<uint8_t*>(obj), kPageSize) + : (black_allocations_begin_ + (i - moving_first_objs_count_) * kPageSize); + break; + } + } + } + } else { + DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u); + idx_addr = bump_pointer_space_->Begin() + pre_compact_offset_moving_space_[idx] * kAlignment; + reclaim_begin = idx_addr; + DCHECK_LE(reclaim_begin, black_allocations_begin_); + mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr(); + if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) { + DCHECK_LT(idx, moving_first_objs_count_); + mirror::Object* obj = first_obj; + for (size_t i = idx + 1; i < moving_first_objs_count_; i++) { + obj = first_objs_moving_space_[i].AsMirrorPtr(); + if (first_obj != obj) { + DCHECK_LT(first_obj, obj); + DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj)); + reclaim_begin = reinterpret_cast<uint8_t*>(obj); + break; + } + } + if (obj == first_obj) { + reclaim_begin = black_allocations_begin_; + } + } + reclaim_begin = AlignUp(reclaim_begin, kPageSize); + } + + DCHECK_NE(reclaim_begin, nullptr); + DCHECK_ALIGNED(reclaim_begin, kPageSize); + DCHECK_ALIGNED(last_reclaimed_page_, kPageSize); + // Check if the 'class_after_obj_map_' map allows pages to be freed. + for (; class_after_obj_iter_ != class_after_obj_ordered_map_.rend(); class_after_obj_iter_++) { + mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr(); + mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass)); + // Check with class' end to ensure that, if required, the entire class survives. + uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>(); + DCHECK_LE(klass_end, last_reclaimed_page_); + if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) { + // Found a class which is in the reclaim range. + uint8_t* obj_addr = reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr()); + // NOTE: Don't assert that obj is of 'klass' type as klass could instead + // be its super-class. + if (obj_addr < idx_addr) { + // Its lowest-address object is not compacted yet. Reclaim starting from + // the end of this class. + reclaim_begin = AlignUp(klass_end, kPageSize); + } else { + // Continue consuming pairs wherein the lowest address object has already + // been compacted. + continue; + } + } + // All the remaining class (and thereby corresponding object) addresses are + // lower than the reclaim range. + break; + } + + ssize_t size = last_reclaimed_page_ - reclaim_begin; + if (size >= kMinFromSpaceMadviseSize) { + int behavior = minor_fault_initialized_ ? MADV_REMOVE : MADV_DONTNEED; + CHECK_EQ(madvise(reclaim_begin + from_space_slide_diff_, size, behavior), 0) + << "madvise of from-space failed: " << strerror(errno); + last_reclaimed_page_ = reclaim_begin; + } + last_checked_reclaim_page_idx_ = idx; +} + +void MarkCompact::UpdateClassAfterObjMap() { + CHECK(class_after_obj_ordered_map_.empty()); + for (const auto& pair : class_after_obj_hash_map_) { + auto super_class_iter = super_class_after_class_hash_map_.find(pair.first); + ObjReference key = super_class_iter != super_class_after_class_hash_map_.end() + ? super_class_iter->second + : pair.first; + if (std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), key.AsMirrorPtr()) && + bump_pointer_space_->HasAddress(key.AsMirrorPtr())) { + auto [ret_iter, success] = class_after_obj_ordered_map_.try_emplace(key, pair.second); + // It could fail only if the class 'key' has objects of its own, which are lower in + // address order, as well of some of its derived class. In this case + // choose the lowest address object. + if (!success && + std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), ret_iter->second.AsMirrorPtr())) { + ret_iter->second = pair.second; + } + } + } + class_after_obj_hash_map_.clear(); + super_class_after_class_hash_map_.clear(); +} + +template <int kMode> +void MarkCompact::CompactMovingSpace(uint8_t* page) { + // For every page we have a starting object, which may have started in some + // preceding page, and an offset within that object from where we must start + // copying. + // Consult the live-words bitmap to copy all contiguously live words at a + // time. These words may constitute multiple objects. To avoid the need for + // consulting mark-bitmap to find where does the next live object start, we + // use the object-size returned by VisitRefsForCompaction. + // + // We do the compaction in reverse direction so that the pages containing + // TLAB and latest allocations are processed first. + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_; + size_t idx = page_status_arr_len; + uint8_t* to_space_end = bump_pointer_space_->Begin() + page_status_arr_len * kPageSize; + uint8_t* shadow_space_end = nullptr; + if (kMode == kMinorFaultMode) { + shadow_space_end = shadow_to_space_map_.Begin() + page_status_arr_len * kPageSize; + } + uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * kPageSize); + + DCHECK(IsAligned<kPageSize>(pre_compact_page)); + + UpdateClassAfterObjMap(); + // These variables are maintained by FreeFromSpacePages(). + last_reclaimed_page_ = pre_compact_page; + last_checked_reclaim_page_idx_ = idx; + class_after_obj_iter_ = class_after_obj_ordered_map_.rbegin(); + // Allocated-black pages + while (idx > moving_first_objs_count_) { + idx--; + pre_compact_page -= kPageSize; + to_space_end -= kPageSize; + if (kMode == kMinorFaultMode) { + shadow_space_end -= kPageSize; + page = shadow_space_end; + } else if (kMode == kFallbackMode) { + page = to_space_end; + } + mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr(); + if (first_obj != nullptr) { + DoPageCompactionWithStateChange<kMode>( + idx, + page_status_arr_len, + to_space_end, + page, + [&]() REQUIRES_SHARED(Locks::mutator_lock_) { + SlideBlackPage(first_obj, idx, pre_compact_page, page, kMode == kCopyMode); + }); + // We are sliding here, so no point attempting to madvise for every + // page. Wait for enough pages to be done. + if (idx % (kMinFromSpaceMadviseSize / kPageSize) == 0) { + FreeFromSpacePages(idx, kMode); + } + } + } + DCHECK_EQ(pre_compact_page, black_allocations_begin_); + + while (idx > 0) { + idx--; + to_space_end -= kPageSize; + if (kMode == kMinorFaultMode) { + shadow_space_end -= kPageSize; + page = shadow_space_end; + } else if (kMode == kFallbackMode) { + page = to_space_end; + } + mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr(); + DoPageCompactionWithStateChange<kMode>( + idx, page_status_arr_len, to_space_end, page, [&]() REQUIRES_SHARED(Locks::mutator_lock_) { + CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode); + }); + FreeFromSpacePages(idx, kMode); + } + DCHECK_EQ(to_space_end, bump_pointer_space_->Begin()); +} + +void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) { + DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + kPageSize); + // For every object found in the page, visit the previous object. This ensures + // that we can visit without checking page-end boundary. + // Call VisitRefsForCompaction with from-space read-barrier as the klass object and + // super-class loads require it. + // TODO: Set kVisitNativeRoots to false once we implement concurrent + // compaction + mirror::Object* curr_obj = first; + non_moving_space_bitmap_->VisitMarkedRange( + reinterpret_cast<uintptr_t>(first) + mirror::kObjectHeaderSize, + reinterpret_cast<uintptr_t>(page + kPageSize), + [&](mirror::Object* next_obj) { + // TODO: Once non-moving space update becomes concurrent, we'll + // require fetching the from-space address of 'curr_obj' and then call + // visitor on that. + if (reinterpret_cast<uint8_t*>(curr_obj) < page) { + RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> + visitor(this, curr_obj, page, page + kPageSize); + MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj)); + // Native roots shouldn't be visited as they are done when this + // object's beginning was visited in the preceding page. + curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>( + visitor, begin_offset, MemberOffset(-1)); + } else { + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> + visitor(this, curr_obj, page, page + kPageSize); + curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, + MemberOffset(0), + MemberOffset(-1)); + } + curr_obj = next_obj; + }); + + MemberOffset end_offset(page + kPageSize - reinterpret_cast<uint8_t*>(curr_obj)); + if (reinterpret_cast<uint8_t*>(curr_obj) < page) { + RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> + visitor(this, curr_obj, page, page + kPageSize); + curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>( + visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset); + } else { + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> + visitor(this, curr_obj, page, page + kPageSize); + curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, MemberOffset(0), end_offset); + } +} + +void MarkCompact::UpdateNonMovingSpace() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + // Iterating in reverse ensures that the class pointer in objects which span + // across more than one page gets updated in the end. This is necessary for + // VisitRefsForCompaction() to work correctly. + // TODO: If and when we make non-moving space update concurrent, implement a + // mechanism to remember class pointers for such objects off-heap and pass it + // to VisitRefsForCompaction(). + uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * kPageSize; + for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) { + mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr(); + page -= kPageSize; + // null means there are no objects on the page to update references. + if (obj != nullptr) { + UpdateNonMovingPage(obj, page); + } + } +} + +void MarkCompact::UpdateMovingSpaceBlackAllocations() { + // For sliding black pages, we need the first-object, which overlaps with the + // first byte of the page. Additionally, we compute the size of first chunk of + // black objects. This will suffice for most black pages. Unlike, compaction + // pages, here we don't need to pre-compute the offset within first-obj from + // where sliding has to start. That can be calculated using the pre-compact + // address of the page. Therefore, to save space, we store the first chunk's + // size in black_alloc_pages_first_chunk_size_ array. + // For the pages which may have holes after the first chunk, which could happen + // if a new TLAB starts in the middle of the page, we mark the objects in + // the mark-bitmap. So, if the first-chunk size is smaller than kPageSize, + // then we use the mark-bitmap for the remainder of the page. + uint8_t* const begin = bump_pointer_space_->Begin(); + uint8_t* black_allocs = black_allocations_begin_; + DCHECK_LE(begin, black_allocs); + size_t consumed_blocks_count = 0; + size_t first_block_size; + // Get the list of all blocks allocated in the bump-pointer space. + std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_, + &first_block_size); + DCHECK_LE(first_block_size, (size_t)(black_allocs - begin)); + if (block_sizes != nullptr) { + size_t black_page_idx = moving_first_objs_count_; + uint8_t* block_end = begin + first_block_size; + uint32_t remaining_chunk_size = 0; + uint32_t first_chunk_size = 0; + mirror::Object* first_obj = nullptr; + for (size_t block_size : *block_sizes) { + block_end += block_size; + // Skip the blocks that are prior to the black allocations. These will be + // merged with the main-block later. + if (black_allocs >= block_end) { + consumed_blocks_count++; + continue; + } + mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs); + bool set_mark_bit = remaining_chunk_size > 0; + // We don't know how many objects are allocated in the current block. When we hit + // a null assume it's the end. This works as every block is expected to + // have objects allocated linearly using bump-pointer. + // BumpPointerSpace::Walk() also works similarly. + while (black_allocs < block_end + && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { + // Try to keep instructions which access class instance together to + // avoid reloading the pointer from object. + size_t obj_size = obj->SizeOf(); + bytes_scanned_ += obj_size; + obj_size = RoundUp(obj_size, kAlignment); + UpdateClassAfterObjectMap(obj); + if (first_obj == nullptr) { + first_obj = obj; + } + // We only need the mark-bitmap in the pages wherein a new TLAB starts in + // the middle of the page. + if (set_mark_bit) { + moving_space_bitmap_->Set(obj); + } + // Handle objects which cross page boundary, including objects larger + // than page size. + if (remaining_chunk_size + obj_size >= kPageSize) { + set_mark_bit = false; + first_chunk_size += kPageSize - remaining_chunk_size; + remaining_chunk_size += obj_size; + // We should not store first-object and remaining_chunk_size if there were + // unused bytes before this TLAB, in which case we must have already + // stored the values (below). + if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) { + black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size; + first_objs_moving_space_[black_page_idx].Assign(first_obj); + } + black_page_idx++; + remaining_chunk_size -= kPageSize; + // Consume an object larger than page size. + while (remaining_chunk_size >= kPageSize) { + black_alloc_pages_first_chunk_size_[black_page_idx] = kPageSize; + first_objs_moving_space_[black_page_idx].Assign(obj); + black_page_idx++; + remaining_chunk_size -= kPageSize; + } + first_obj = remaining_chunk_size > 0 ? obj : nullptr; + first_chunk_size = remaining_chunk_size; + } else { + DCHECK_LE(first_chunk_size, remaining_chunk_size); + first_chunk_size += obj_size; + remaining_chunk_size += obj_size; + } + black_allocs += obj_size; + obj = reinterpret_cast<mirror::Object*>(black_allocs); + } + DCHECK_LE(black_allocs, block_end); + DCHECK_LT(remaining_chunk_size, kPageSize); + // consume the unallocated portion of the block + if (black_allocs < block_end) { + // first-chunk of the current page ends here. Store it. + if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) { + black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size; + first_objs_moving_space_[black_page_idx].Assign(first_obj); + } + first_chunk_size = 0; + first_obj = nullptr; + size_t page_remaining = kPageSize - remaining_chunk_size; + size_t block_remaining = block_end - black_allocs; + if (page_remaining <= block_remaining) { + block_remaining -= page_remaining; + // current page and the subsequent empty pages in the block + black_page_idx += 1 + block_remaining / kPageSize; + remaining_chunk_size = block_remaining % kPageSize; + } else { + remaining_chunk_size += block_remaining; + } + black_allocs = block_end; + } + } + if (black_page_idx < bump_pointer_space_->Size() / kPageSize) { + // Store the leftover first-chunk, if any, and update page index. + if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) { + black_page_idx++; + } else if (first_chunk_size > 0) { + black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size; + first_objs_moving_space_[black_page_idx].Assign(first_obj); + black_page_idx++; + } + } + black_page_count_ = black_page_idx - moving_first_objs_count_; + delete block_sizes; + } + // Update bump-pointer space by consuming all the pre-black blocks into the + // main one. + bump_pointer_space_->SetBlockSizes(thread_running_gc_, + post_compact_end_ - begin, + consumed_blocks_count); +} + +void MarkCompact::UpdateNonMovingSpaceBlackAllocations() { + accounting::ObjectStack* stack = heap_->GetAllocationStack(); + const StackReference<mirror::Object>* limit = stack->End(); + uint8_t* const space_begin = non_moving_space_->Begin(); + for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) { + mirror::Object* obj = it->AsMirrorPtr(); + if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) { + non_moving_space_bitmap_->Set(obj); + // Clear so that we don't try to set the bit again in the next GC-cycle. + it->Clear(); + size_t idx = (reinterpret_cast<uint8_t*>(obj) - space_begin) / kPageSize; + uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), kPageSize); + mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr(); + if (first_obj == nullptr + || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) { + first_objs_non_moving_space_[idx].Assign(obj); + } + mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr(); + uint8_t* next_page_begin = page_begin + kPageSize; + if (next_page_first_obj == nullptr + || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) { + size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment); + uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size; + while (next_page_begin < obj_end) { + first_objs_non_moving_space_[idx++].Assign(obj); + next_page_begin += kPageSize; + } + } + // update first_objs count in case we went past non_moving_first_objs_count_ + non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx); + } + } +} + +class MarkCompact::ImmuneSpaceUpdateObjVisitor { + public: + ImmuneSpaceUpdateObjVisitor(MarkCompact* collector, bool visit_native_roots) + : collector_(collector), visit_native_roots_(visit_native_roots) {} + + ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES(Locks::mutator_lock_) { + RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_, + obj, + /*begin_*/nullptr, + /*end_*/nullptr); + if (visit_native_roots_) { + obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ true>( + visitor, MemberOffset(0), MemberOffset(-1)); + } else { + obj->VisitRefsForCompaction</*kFetchObjSize*/ false>( + visitor, MemberOffset(0), MemberOffset(-1)); + } + } + + static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) { + reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj); + } + + private: + MarkCompact* const collector_; + const bool visit_native_roots_; +}; + +class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor { + public: + explicit ClassLoaderRootsUpdater(MarkCompact* collector) : collector_(collector) {} + + void Visit(ObjPtr<mirror::ClassLoader> class_loader) override + REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) { + ClassTable* const class_table = class_loader->GetClassTable(); + if (class_table != nullptr) { + class_table->VisitRoots(*this); + } + } + + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const + REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) { + if (!root->IsNull()) { + VisitRoot(root); + } + } + + void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const + REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) { + collector_->VisitRoots(&root, 1, RootInfo(RootType::kRootVMInternal)); + } + + private: + MarkCompact* collector_; +}; + +class MarkCompact::LinearAllocPageUpdater { + public: + explicit LinearAllocPageUpdater(MarkCompact* collector) : collector_(collector) {} + + void operator()(uint8_t* page_begin, uint8_t* first_obj) ALWAYS_INLINE + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_ALIGNED(page_begin, kPageSize); + uint8_t* page_end = page_begin + kPageSize; + uint32_t obj_size; + for (uint8_t* byte = first_obj; byte < page_end;) { + TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte); + obj_size = header->GetSize(); + if (UNLIKELY(obj_size == 0)) { + // No more objects in this page to visit. + last_page_touched_ = byte >= page_begin; + return; + } + uint8_t* obj = byte + sizeof(TrackingHeader); + uint8_t* obj_end = byte + obj_size; + if (header->Is16Aligned()) { + obj = AlignUp(obj, 16); + } + uint8_t* begin_boundary = std::max(obj, page_begin); + uint8_t* end_boundary = std::min(obj_end, page_end); + if (begin_boundary < end_boundary) { + VisitObject(header->GetKind(), obj, begin_boundary, end_boundary); + } + if (ArenaAllocator::IsRunningOnMemoryTool()) { + obj_size += ArenaAllocator::kMemoryToolRedZoneBytes; + } + byte += RoundUp(obj_size, LinearAlloc::kAlignment); + } + last_page_touched_ = true; + } + + bool WasLastPageTouched() const { return last_page_touched_; } + + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const + ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) { + if (!root->IsNull()) { + VisitRoot(root); + } + } + + void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const + ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) { + mirror::Object* old_ref = root->AsMirrorPtr(); + DCHECK_NE(old_ref, nullptr); + if (collector_->live_words_bitmap_->HasAddress(old_ref)) { + mirror::Object* new_ref = old_ref; + if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) { + new_ref = collector_->PostCompactBlackObjAddr(old_ref); + } else if (collector_->live_words_bitmap_->Test(old_ref)) { + DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) << old_ref; + new_ref = collector_->PostCompactOldObjAddr(old_ref); + } + if (old_ref != new_ref) { + root->Assign(new_ref); + } + } + } + + private: + void VisitObject(LinearAllocKind kind, + void* obj, + uint8_t* start_boundary, + uint8_t* end_boundary) const REQUIRES_SHARED(Locks::mutator_lock_) { + switch (kind) { + case LinearAllocKind::kNoGCRoots: + break; + case LinearAllocKind::kGCRootArray: + { + GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary); + GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary); + for (; root < last; root++) { + VisitRootIfNonNull(root->AddressWithoutBarrier()); + } + } + break; + case LinearAllocKind::kArtMethodArray: + { + LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj); + // Old methods are clobbered in debug builds. Check size to confirm if the array + // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods() + if (array->size() > 0) { + if (collector_->pointer_size_ == PointerSize::k64) { + ArtMethod::VisitArrayRoots<PointerSize::k64>( + *this, start_boundary, end_boundary, array); + } else { + DCHECK_EQ(collector_->pointer_size_, PointerSize::k32); + ArtMethod::VisitArrayRoots<PointerSize::k32>( + *this, start_boundary, end_boundary, array); + } + } + } + break; + case LinearAllocKind::kArtMethod: + ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj)); + break; + case LinearAllocKind::kArtFieldArray: + ArtField::VisitArrayRoots(*this, + start_boundary, + end_boundary, + static_cast<LengthPrefixedArray<ArtField>*>(obj)); + break; + case LinearAllocKind::kDexCacheArray: + { + mirror::DexCachePair<mirror::Object>* first = + reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary); + mirror::DexCachePair<mirror::Object>* last = + reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary); + mirror::DexCache::VisitDexCachePairRoots(*this, first, last); + } + } + } + + MarkCompact* const collector_; + // Whether the last page was touched or not. + bool last_page_touched_; +}; + +void MarkCompact::CompactionPause() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + Runtime* runtime = Runtime::Current(); + non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap(); + if (kIsDebugBuild) { + DCHECK_EQ(thread_running_gc_, Thread::Current()); + stack_low_addr_ = thread_running_gc_->GetStackEnd(); + stack_high_addr_ = + reinterpret_cast<char*>(stack_low_addr_) + thread_running_gc_->GetStackSize(); + } + { + TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings()); + ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_); + // Refresh data-structures to catch-up on allocations that may have + // happened since marking-phase pause. + // There could be several TLABs that got allocated since marking pause. We + // don't want to compact them and instead update the TLAB info in TLS and + // let mutators continue to use the TLABs. + // We need to set all the bits in live-words bitmap corresponding to allocated + // objects. Also, we need to find the objects that are overlapping with + // page-begin boundaries. Unlike objects allocated before + // black_allocations_begin_, which can be identified via mark-bitmap, we can get + // this info only via walking the space past black_allocations_begin_, which + // involves fetching object size. + // TODO: We can reduce the time spent on this in a pause by performing one + // round of this concurrently prior to the pause. + UpdateMovingSpaceBlackAllocations(); + // TODO: If we want to avoid this allocation in a pause then we will have to + // allocate an array for the entire moving-space size, which can be made + // part of info_map_. + moving_pages_status_ = new Atomic<PageState>[moving_first_objs_count_ + black_page_count_]; + if (kIsDebugBuild) { + size_t len = moving_first_objs_count_ + black_page_count_; + for (size_t i = 0; i < len; i++) { + CHECK_EQ(moving_pages_status_[i].load(std::memory_order_relaxed), + PageState::kUnprocessed); + } + } + // Iterate over the allocation_stack_, for every object in the non-moving + // space: + // 1. Mark the object in live bitmap + // 2. Erase the object from allocation stack + // 3. In the corresponding page, if the first-object vector needs updating + // then do so. + UpdateNonMovingSpaceBlackAllocations(); + + // This store is visible to mutator (or uffd worker threads) as the mutator + // lock's unlock guarantees that. + compacting_ = true; + // Start updating roots and system weaks now. + heap_->GetReferenceProcessor()->UpdateRoots(this); + } + { + TimingLogger::ScopedTiming t2("(Paused)UpdateClassLoaderRoots", GetTimings()); + ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); + { + ClassLoaderRootsUpdater updater(this); + runtime->GetClassLinker()->VisitClassLoaders(&updater); + } + } + + bool has_zygote_space = heap_->HasZygoteSpace(); + // TODO: Find out why it's not sufficient to visit native roots of immune + // spaces, and why all the pre-zygote fork arenas have to be linearly updated. + // Is it possible that some native root starts getting pointed to by some object + // in moving space after fork? Or are we missing a write-barrier somewhere + // when a native root is updated? + GcVisitedArenaPool* arena_pool = + static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); + if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) { + // Besides fallback-mode, visit linear-alloc space in the pause for zygote + // processes prior to first fork (that's when zygote space gets created). + if (kIsDebugBuild && IsValidFd(uffd_)) { + // All arenas allocated so far are expected to be pre-zygote fork. + arena_pool->ForEachAllocatedArena( + [](const TrackedArena& arena) + REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); }); + } + LinearAllocPageUpdater updater(this); + arena_pool->VisitRoots(updater); + } else { + // Clear the flag as we care about this only if arenas are freed during + // concurrent compaction. + arena_pool->ClearArenasFreed(); + arena_pool->ForEachAllocatedArena( + [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) { + // The pre-zygote fork arenas are not visited concurrently in the + // zygote children processes. The native roots of the dirty objects + // are visited during immune space visit below. + if (!arena.IsPreZygoteForkArena()) { + uint8_t* last_byte = arena.GetLastUsedByte(); + CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second); + } else { + LinearAllocPageUpdater updater(this); + arena.VisitRoots(updater); + } + }); + } + + SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ true); + + { + TimingLogger::ScopedTiming t2("(Paused)UpdateConcurrentRoots", GetTimings()); + runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots); + } + { + // TODO: don't visit the transaction roots if it's not active. + TimingLogger::ScopedTiming t2("(Paused)UpdateNonThreadRoots", GetTimings()); + runtime->VisitNonThreadRoots(this); + } + + { + // TODO: Immune space updation has to happen either before or after + // remapping pre-compact pages to from-space. And depending on when it's + // done, we have to invoke VisitRefsForCompaction() with or without + // read-barrier. + TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings()); + accounting::CardTable* const card_table = heap_->GetCardTable(); + for (auto& space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); + // Having zygote-space indicates that the first zygote fork has taken + // place and that the classes/dex-caches in immune-spaces may have allocations + // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the + // non-userfaultfd visited private-anonymous mappings. Visit them here. + ImmuneSpaceUpdateObjVisitor visitor(this, /*visit_native_roots=*/false); + if (table != nullptr) { + table->ProcessCards(); + table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor); + } else { + WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_); + card_table->Scan<false>( + live_bitmap, + space->Begin(), + space->Limit(), + visitor, + accounting::CardTable::kCardDirty - 1); + } + } + } + + if (use_uffd_sigbus_) { + // Release order wrt to mutator threads' SIGBUS handler load. + sigbus_in_progress_count_.store(0, std::memory_order_release); + } + KernelPreparation(); + UpdateNonMovingSpace(); + // fallback mode + if (uffd_ == kFallbackMode) { + CompactMovingSpace<kFallbackMode>(nullptr); + + int32_t freed_bytes = black_objs_slide_diff_; + bump_pointer_space_->RecordFree(freed_objects_, freed_bytes); + RecordFree(ObjectBytePair(freed_objects_, freed_bytes)); + } else { + DCHECK_EQ(compaction_in_progress_count_.load(std::memory_order_relaxed), 0u); + DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1); + if (!use_uffd_sigbus_) { + // We must start worker threads before resuming mutators to avoid deadlocks. + heap_->GetThreadPool()->StartWorkers(thread_running_gc_); + } + } + stack_low_addr_ = nullptr; +} + +void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, + uint8_t* from_addr, + size_t map_size, + int fd, + uint8_t* shadow_addr) { + int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED; + if (gHaveMremapDontunmap) { + mremap_flags |= MREMAP_DONTUNMAP; + } + + void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr); + CHECK_EQ(ret, static_cast<void*>(from_addr)) + << "mremap to move pages failed: " << strerror(errno) + << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size); + + if (shadow_addr != nullptr) { + DCHECK_EQ(fd, kFdUnused); + DCHECK(gHaveMremapDontunmap); + ret = mremap(shadow_addr, map_size, map_size, mremap_flags, to_addr); + CHECK_EQ(ret, static_cast<void*>(to_addr)) + << "mremap from shadow to to-space map failed: " << strerror(errno); + } else if (!gHaveMremapDontunmap || fd > kFdUnused) { + // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap + // the moving space again. + int mmap_flags = MAP_FIXED; + if (fd == kFdUnused) { + // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr' + // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't + // available, to avoid unmapping someone else' mapping and then causing + // crashes elsewhere. + mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; + // On some platforms MAP_ANONYMOUS expects fd to be -1. + fd = -1; + } else if (IsValidFd(fd)) { + mmap_flags |= MAP_SHARED; + } else { + DCHECK_EQ(fd, kFdSharedAnon); + mmap_flags |= MAP_SHARED | MAP_ANONYMOUS; + } + ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, fd, 0); + CHECK_EQ(ret, static_cast<void*>(to_addr)) + << "mmap for moving space failed: " << strerror(errno); + } +} + +void MarkCompact::KernelPreparation() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + uint8_t* moving_space_begin = bump_pointer_space_->Begin(); + size_t moving_space_size = bump_pointer_space_->Capacity(); + int mode = kCopyMode; + size_t moving_space_register_sz; + if (minor_fault_initialized_) { + moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * kPageSize; + if (shadow_to_space_map_.IsValid()) { + size_t shadow_size = shadow_to_space_map_.Size(); + void* addr = shadow_to_space_map_.Begin(); + if (shadow_size < moving_space_register_sz) { + addr = mremap(addr, + shadow_size, + moving_space_register_sz, + // Don't allow moving with obj-ptr poisoning as the + // mapping needs to be in <4GB address space. + kObjPtrPoisoning ? 0 : MREMAP_MAYMOVE, + /*new_address=*/nullptr); + if (addr != MAP_FAILED) { + // Succeeded in expanding the mapping. Update the MemMap entry for shadow map. + MemMap temp = MemMap::MapPlaceholder( + "moving-space-shadow", static_cast<uint8_t*>(addr), moving_space_register_sz); + std::swap(shadow_to_space_map_, temp); + } + } + if (addr != MAP_FAILED) { + mode = kMinorFaultMode; + } else { + // We are not going to use shadow map. So protect it to catch any + // potential bugs. + DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0) + << "mprotect failed: " << strerror(errno); + } + } + } else { + moving_space_register_sz = moving_space_size; + } + + bool map_shared = + minor_fault_initialized_ || (!Runtime::Current()->IsZygote() && uffd_minor_fault_supported_); + uint8_t* shadow_addr = nullptr; + if (moving_to_space_fd_ == kFdUnused && map_shared) { + DCHECK(gHaveMremapDontunmap); + DCHECK(shadow_to_space_map_.IsValid()); + DCHECK_EQ(shadow_to_space_map_.Size(), moving_space_size); + shadow_addr = shadow_to_space_map_.Begin(); + } + + KernelPrepareRangeForUffd(moving_space_begin, + from_space_begin_, + moving_space_size, + moving_to_space_fd_, + shadow_addr); + + if (IsValidFd(uffd_)) { + // Register the moving space with userfaultfd. + RegisterUffd(moving_space_begin, moving_space_register_sz, mode); + // Prepare linear-alloc for concurrent compaction. + for (auto& data : linear_alloc_spaces_data_) { + bool mmap_again = map_shared && !data.already_shared_; + DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_); + // There could be threads running in suspended mode when the compaction + // pause is being executed. In order to make the userfaultfd setup atomic, + // the registration has to be done *before* moving the pages to shadow map. + if (!mmap_again) { + // See the comment in the constructor as to why it's conditionally done. + RegisterUffd(data.begin_, + data.shadow_.Size(), + minor_fault_initialized_ ? kMinorFaultMode : kCopyMode); + } + KernelPrepareRangeForUffd(data.begin_, + data.shadow_.Begin(), + data.shadow_.Size(), + mmap_again ? kFdSharedAnon : kFdUnused); + if (mmap_again) { + data.already_shared_ = true; + RegisterUffd(data.begin_, + data.shadow_.Size(), + minor_fault_initialized_ ? kMinorFaultMode : kCopyMode); + } + } + } + if (map_shared) { + // Start mapping linear-alloc MAP_SHARED only after the compaction pause of + // the first GC in non-zygote processes. This is the GC which sets up + // mappings for using minor-fault in future. Up to this point we run + // userfaultfd in copy-mode, which requires the mappings (of linear-alloc) + // to be MAP_PRIVATE. + map_linear_alloc_shared_ = true; + } +} + +template <int kMode> +void MarkCompact::ConcurrentCompaction(uint8_t* buf) { + DCHECK_NE(kMode, kFallbackMode); + DCHECK(kMode != kCopyMode || buf != nullptr); + size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_; + while (true) { + struct uffd_msg msg; + ssize_t nread = read(uffd_, &msg, sizeof(msg)); + CHECK_GT(nread, 0); + CHECK_EQ(msg.event, UFFD_EVENT_PAGEFAULT); + DCHECK_EQ(nread, static_cast<ssize_t>(sizeof(msg))); + uint8_t* fault_addr = reinterpret_cast<uint8_t*>(msg.arg.pagefault.address); + if (fault_addr == conc_compaction_termination_page_) { + // The counter doesn't need to be updated atomically as only one thread + // would wake up against the gc-thread's load to this fault_addr. In fact, + // the other threads would wake up serially because every exiting thread + // will wake up gc-thread, which would retry load but again would find the + // page missing. Also, the value will be flushed to caches due to the ioctl + // syscall below. + uint8_t ret = thread_pool_counter_--; + // If 'gKernelHasFaultRetry == true' then only the last thread should map the + // zeropage so that the gc-thread can proceed. Otherwise, each thread does + // it and the gc-thread will repeat this fault until thread_pool_counter == 0. + if (!gKernelHasFaultRetry || ret == 1) { + ZeropageIoctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false); + } else { + struct uffdio_range uffd_range; + uffd_range.start = msg.arg.pagefault.address; + uffd_range.len = kPageSize; + CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &uffd_range), 0) + << "ioctl_userfaultfd: wake failed for concurrent-compaction termination page: " + << strerror(errno); + } + break; + } + uint8_t* fault_page = AlignDown(fault_addr, kPageSize); + if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))) { + ConcurrentlyProcessMovingPage<kMode>(fault_page, buf, nr_moving_space_used_pages); + } else if (minor_fault_initialized_) { + ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>( + fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); + } else { + ConcurrentlyProcessLinearAllocPage<kCopyMode>( + fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); + } + } +} + +bool MarkCompact::SigbusHandler(siginfo_t* info) { + class ScopedInProgressCount { + public: + explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) { + // Increment the count only if compaction is not done yet. + SigbusCounterType prev = + collector_->sigbus_in_progress_count_.load(std::memory_order_relaxed); + while ((prev & kSigbusCounterCompactionDoneMask) == 0) { + if (collector_->sigbus_in_progress_count_.compare_exchange_strong( + prev, prev + 1, std::memory_order_acquire)) { + DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1); + compaction_done_ = false; + return; + } + } + compaction_done_ = true; + } + + bool IsCompactionDone() const { + return compaction_done_; + } + + ~ScopedInProgressCount() { + if (!IsCompactionDone()) { + collector_->sigbus_in_progress_count_.fetch_sub(1, std::memory_order_release); + } + } + + private: + MarkCompact* const collector_; + bool compaction_done_; + }; + + DCHECK(use_uffd_sigbus_); + if (info->si_code != BUS_ADRERR) { + // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be + // handled here. + return false; + } + + ScopedInProgressCount spc(this); + uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), kPageSize); + if (!spc.IsCompactionDone()) { + if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) { + Thread* self = Thread::Current(); + Locks::mutator_lock_->AssertSharedHeld(self); + size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_; + if (minor_fault_initialized_) { + ConcurrentlyProcessMovingPage<kMinorFaultMode>( + fault_page, nullptr, nr_moving_space_used_pages); + } else { + ConcurrentlyProcessMovingPage<kCopyMode>( + fault_page, self->GetThreadLocalGcBuffer(), nr_moving_space_used_pages); + } + return true; + } else { + // Find the linear-alloc space containing fault-addr + for (auto& data : linear_alloc_spaces_data_) { + if (data.begin_ <= fault_page && data.end_ > fault_page) { + if (minor_fault_initialized_) { + ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(fault_page, false); + } else { + ConcurrentlyProcessLinearAllocPage<kCopyMode>(fault_page, false); + } + return true; + } + } + // Fault address doesn't belong to either moving-space or linear-alloc. + return false; + } + } else { + // We may spuriously get SIGBUS fault, which was initiated before the + // compaction was finished, but ends up here. In that case, if the fault + // address is valid then consider it handled. + return bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) || + linear_alloc_spaces_data_.end() != + std::find_if(linear_alloc_spaces_data_.begin(), + linear_alloc_spaces_data_.end(), + [fault_page](const LinearAllocSpaceData& data) { + return data.begin_ <= fault_page && data.end_ > fault_page; + }); + } +} + +static void BackOff(uint32_t i) { + static constexpr uint32_t kYieldMax = 5; + // TODO: Consider adding x86 PAUSE and/or ARM YIELD here. + if (i <= kYieldMax) { + sched_yield(); + } else { + // nanosleep is not in the async-signal-safe list, but bionic implements it + // with a pure system call, so it should be fine. + NanoSleep(10000ull * (i - kYieldMax)); + } +} + +template <int kMode> +void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page, + uint8_t* buf, + size_t nr_moving_space_used_pages) { + class ScopedInProgressCount { + public: + explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) { + collector_->compaction_in_progress_count_.fetch_add(1, std::memory_order_relaxed); + } + + ~ScopedInProgressCount() { + collector_->compaction_in_progress_count_.fetch_sub(1, std::memory_order_relaxed); + } + + private: + MarkCompact* collector_; + }; + + uint8_t* unused_space_begin = + bump_pointer_space_->Begin() + nr_moving_space_used_pages * kPageSize; + DCHECK(IsAligned<kPageSize>(unused_space_begin)); + DCHECK(kMode == kCopyMode || fault_page < unused_space_begin); + if (kMode == kCopyMode && fault_page >= unused_space_begin) { + // There is a race which allows more than one thread to install a + // zero-page. But we can tolerate that. So absorb the EEXIST returned by + // the ioctl and move on. + ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true); + return; + } + size_t page_idx = (fault_page - bump_pointer_space_->Begin()) / kPageSize; + mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr(); + if (first_obj == nullptr) { + // We should never have a case where two workers are trying to install a + // zeropage in this range as we synchronize using moving_pages_status_[page_idx]. + PageState expected_state = PageState::kUnprocessed; + if (moving_pages_status_[page_idx].compare_exchange_strong( + expected_state, PageState::kProcessedAndMapping, std::memory_order_relaxed)) { + // Note: ioctl acts as an acquire fence. + ZeropageIoctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true); + } else { + DCHECK_EQ(expected_state, PageState::kProcessedAndMapping); + } + return; + } + + PageState state = moving_pages_status_[page_idx].load( + use_uffd_sigbus_ ? std::memory_order_acquire : std::memory_order_relaxed); + uint32_t backoff_count = 0; + while (true) { + switch (state) { + case PageState::kUnprocessed: { + // The increment to the in-progress counter must be done before updating + // the page's state. Otherwise, we will end up leaving a window wherein + // the GC-thread could observe that no worker is working on compaction + // and could end up unregistering the moving space from userfaultfd. + ScopedInProgressCount spc(this); + // Acquire order to ensure we don't start writing to shadow map, which is + // shared, before the CAS is successful. Release order to ensure that the + // increment to moving_compactions_in_progress above is not re-ordered + // after the CAS. + if (moving_pages_status_[page_idx].compare_exchange_strong( + state, PageState::kMutatorProcessing, std::memory_order_acq_rel)) { + if (kMode == kMinorFaultMode) { + DCHECK_EQ(buf, nullptr); + buf = shadow_to_space_map_.Begin() + page_idx * kPageSize; + } else if (UNLIKELY(buf == nullptr)) { + DCHECK_EQ(kMode, kCopyMode); + uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed); + // The buffer-map is one page bigger as the first buffer is used by GC-thread. + CHECK_LE(idx, kMutatorCompactionBufferCount); + buf = compaction_buffers_map_.Begin() + idx * kPageSize; + DCHECK(compaction_buffers_map_.HasAddress(buf)); + Thread::Current()->SetThreadLocalGcBuffer(buf); + } + + if (fault_page < post_compact_end_) { + // The page has to be compacted. + CompactPage( + first_obj, pre_compact_offset_moving_space_[page_idx], buf, kMode == kCopyMode); + } else { + DCHECK_NE(first_obj, nullptr); + DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u); + uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_); + DCHECK(IsAligned<kPageSize>(pre_compact_page)); + SlideBlackPage(first_obj, page_idx, pre_compact_page, buf, kMode == kCopyMode); + } + // Nobody else would simultaneously modify this page's state so an + // atomic store is sufficient. Use 'release' order to guarantee that + // loads/stores to the page are finished before this store. + moving_pages_status_[page_idx].store(PageState::kProcessedAndMapping, + std::memory_order_release); + if (kMode == kCopyMode) { + CopyIoctl(fault_page, buf); + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread modifies the status at this stage. + moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped, + std::memory_order_release); + } + return; + } else { + break; + } + } + } + continue; + case PageState::kProcessing: + DCHECK_EQ(kMode, kMinorFaultMode); + if (moving_pages_status_[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_relaxed) && + !use_uffd_sigbus_) { + // Somebody else took or will take care of finishing the compaction and + // then mapping the page. + return; + } + continue; + case PageState::kProcessed: + // The page is processed but not mapped. We should map it. + break; + case PageState::kProcessingAndMapping: + case PageState::kMutatorProcessing: + case PageState::kProcessedAndMapping: + if (use_uffd_sigbus_) { + // Wait for the page to be mapped before returning. + BackOff(backoff_count++); + state = moving_pages_status_[page_idx].load(std::memory_order_acquire); + continue; + } + return; + case PageState::kProcessedAndMapped: + // Somebody else took care of the page. + return; + } + break; + } + + DCHECK_EQ(kMode, kMinorFaultMode); + if (state == PageState::kUnprocessed) { + MapProcessedPages</*kFirstPageMapping=*/true>( + fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages); + } else { + DCHECK_EQ(state, PageState::kProcessed); + MapProcessedPages</*kFirstPageMapping=*/false>( + fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages); + } +} + +void MarkCompact::MapUpdatedLinearAllocPage(uint8_t* page, + uint8_t* shadow_page, + Atomic<PageState>& state, + bool page_touched) { + DCHECK(!minor_fault_initialized_); + if (page_touched) { + CopyIoctl(page, shadow_page); + } else { + // If the page wasn't touched, then it means it is empty and + // is most likely not present on the shadow-side. Furthermore, + // since the shadow is also userfaultfd registered doing copy + // ioctl fail as the copy-from-user in the kernel will cause + // userfault. Instead, just map a zeropage, which is not only + // correct but also efficient as it avoids unnecessary memcpy + // in the kernel. + ZeropageIoctl(page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false); + } + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread can modify the + // status of this page at this point. + state.store(PageState::kProcessedAndMapped, std::memory_order_release); + } +} + +template <int kMode> +void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) { + DCHECK(!is_minor_fault || kMode == kMinorFaultMode); + auto arena_iter = linear_alloc_arenas_.end(); + { + TrackedArena temp_arena(fault_page); + arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena); + arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter) + : linear_alloc_arenas_.end(); + } + if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) { + // Fault page isn't in any of the arenas that existed before we started + // compaction. So map zeropage and return. + ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false); + } else { + // fault_page should always belong to some arena. + DCHECK(arena_iter != linear_alloc_arenas_.end()) + << "fault_page:" << static_cast<void*>(fault_page) << "is_minor_fault:" << is_minor_fault; + // Find the linear-alloc space containing fault-page + LinearAllocSpaceData* space_data = nullptr; + for (auto& data : linear_alloc_spaces_data_) { + if (data.begin_ <= fault_page && fault_page < data.end_) { + space_data = &data; + break; + } + } + DCHECK_NE(space_data, nullptr); + ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_; + size_t page_idx = (fault_page - space_data->begin_) / kPageSize; + Atomic<PageState>* state_arr = + reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin()); + PageState state = state_arr[page_idx].load(use_uffd_sigbus_ ? std::memory_order_acquire : + std::memory_order_relaxed); + uint32_t backoff_count = 0; + while (true) { + switch (state) { + case PageState::kUnprocessed: { + // Acquire order to ensure we don't start writing to shadow map, which is + // shared, before the CAS is successful. + if (state_arr[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { + if (kMode == kCopyMode || is_minor_fault) { + uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page); + DCHECK_NE(first_obj, nullptr); + LinearAllocPageUpdater updater(this); + updater(fault_page + diff, first_obj + diff); + if (kMode == kCopyMode) { + MapUpdatedLinearAllocPage(fault_page, + fault_page + diff, + state_arr[page_idx], + updater.WasLastPageTouched()); + return; + } + } else { + // Don't touch the page in this case (there is no reason to do so + // anyways) as it would mean reading from first_obj, which could be on + // another missing page and hence may cause this thread to block, leading + // to deadlocks. + // Force read the page if it is missing so that a zeropage gets mapped on + // the shadow map and then CONTINUE ioctl will map it on linear-alloc. + ForceRead(fault_page + diff); + } + MapProcessedPages</*kFirstPageMapping=*/true>( + fault_page, state_arr, page_idx, space_data->page_status_map_.Size()); + return; + } + } + continue; + case PageState::kProcessing: + DCHECK_EQ(kMode, kMinorFaultMode); + if (state_arr[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_relaxed) && + !use_uffd_sigbus_) { + // Somebody else took or will take care of finishing the updates and + // then mapping the page. + return; + } + continue; + case PageState::kProcessed: + // The page is processed but not mapped. We should map it. + break; + case PageState::kMutatorProcessing: + UNREACHABLE(); + case PageState::kProcessingAndMapping: + case PageState::kProcessedAndMapping: + if (use_uffd_sigbus_) { + // Wait for the page to be mapped before returning. + BackOff(backoff_count++); + state = state_arr[page_idx].load(std::memory_order_acquire); + continue; + } + return; + case PageState::kProcessedAndMapped: + // Somebody else took care of the page. + return; + } + break; + } + + DCHECK_EQ(kMode, kMinorFaultMode); + DCHECK_EQ(state, PageState::kProcessed); + if (!is_minor_fault) { + // Force read the page if it is missing so that a zeropage gets mapped on + // the shadow map and then CONTINUE ioctl will map it on linear-alloc. + ForceRead(fault_page + diff); + } + MapProcessedPages</*kFirstPageMapping=*/false>( + fault_page, state_arr, page_idx, space_data->page_status_map_.Size()); + } +} + +void MarkCompact::ProcessLinearAlloc() { + GcVisitedArenaPool* arena_pool = + static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); + for (auto& pair : linear_alloc_arenas_) { + const TrackedArena* arena = pair.first; + size_t arena_size; + uint8_t* arena_begin; + ptrdiff_t diff; + bool others_processing; + { + // Acquire arena-pool's lock so that the arena being worked cannot be + // deallocated at the same time. + std::lock_guard<std::mutex> lock(arena_pool->GetLock()); + // If any arenas were freed since compaction pause then skip them from + // visiting. + if (arena_pool->AreArenasFreed() && !arena_pool->FindAllocatedArena(arena)) { + continue; + } + uint8_t* last_byte = pair.second; + DCHECK_ALIGNED(last_byte, kPageSize); + others_processing = false; + arena_begin = arena->Begin(); + arena_size = arena->Size(); + // Find the linear-alloc space containing the arena + LinearAllocSpaceData* space_data = nullptr; + for (auto& data : linear_alloc_spaces_data_) { + if (data.begin_ <= arena_begin && arena_begin < data.end_) { + space_data = &data; + break; + } + } + DCHECK_NE(space_data, nullptr); + diff = space_data->shadow_.Begin() - space_data->begin_; + auto visitor = [space_data, last_byte, diff, this, &others_processing]( + uint8_t* page_begin, + uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) { + // No need to process pages past last_byte as they already have updated + // gc-roots, if any. + if (page_begin >= last_byte) { + return; + } + LinearAllocPageUpdater updater(this); + size_t page_idx = (page_begin - space_data->begin_) / kPageSize; + DCHECK_LT(page_idx, space_data->page_status_map_.Size()); + Atomic<PageState>* state_arr = + reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin()); + PageState expected_state = PageState::kUnprocessed; + PageState desired_state = + minor_fault_initialized_ ? PageState::kProcessing : PageState::kProcessingAndMapping; + // Acquire order to ensure that we don't start accessing the shadow page, + // which is shared with other threads, prior to CAS. Also, for same + // reason, we used 'release' order for changing the state to 'processed'. + if (state_arr[page_idx].compare_exchange_strong( + expected_state, desired_state, std::memory_order_acquire)) { + updater(page_begin + diff, first_obj + diff); + expected_state = PageState::kProcessing; + if (!minor_fault_initialized_) { + MapUpdatedLinearAllocPage( + page_begin, page_begin + diff, state_arr[page_idx], updater.WasLastPageTouched()); + } else if (!state_arr[page_idx].compare_exchange_strong( + expected_state, PageState::kProcessed, std::memory_order_release)) { + DCHECK_EQ(expected_state, PageState::kProcessingAndMapping); + // Force read in case the page was missing and updater didn't touch it + // as there was nothing to do. This will ensure that a zeropage is + // faulted on the shadow map. + ForceRead(page_begin + diff); + MapProcessedPages</*kFirstPageMapping=*/true>( + page_begin, state_arr, page_idx, space_data->page_status_map_.Size()); + } + } else { + others_processing = true; + } + }; + + arena->VisitRoots(visitor); + } + // If we are not in minor-fault mode and if no other thread was found to be + // processing any pages in this arena, then we can madvise the shadow size. + // Otherwise, we will double the memory use for linear-alloc. + if (!minor_fault_initialized_ && !others_processing) { + ZeroAndReleasePages(arena_begin + diff, arena_size); + } + } +} + +void MarkCompact::RegisterUffd(void* addr, size_t size, int mode) { + DCHECK(IsValidFd(uffd_)); + struct uffdio_register uffd_register; + uffd_register.range.start = reinterpret_cast<uintptr_t>(addr); + uffd_register.range.len = size; + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (mode == kMinorFaultMode) { + uffd_register.mode |= UFFDIO_REGISTER_MODE_MINOR; + } + CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) + << "ioctl_userfaultfd: register failed: " << strerror(errno) + << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size); +} + +void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) { + DCHECK(IsValidFd(uffd_)); + struct uffdio_range range; + range.start = reinterpret_cast<uintptr_t>(start); + range.len = len; + CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0) + << "ioctl_userfaultfd: unregister failed: " << strerror(errno) + << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len); + // Due to an oversight in the kernel implementation of 'unregister', the + // waiting threads are woken up only for copy uffds. Therefore, for now, we + // have to explicitly wake up the threads in minor-fault case. + // TODO: The fix in the kernel is being worked on. Once the kernel version + // containing the fix is known, make it conditional on that as well. + if (minor_fault_initialized_) { + CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &range), 0) + << "ioctl_userfaultfd: wake failed: " << strerror(errno) + << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len); + } +} + +void MarkCompact::CompactionPhase() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + { + int32_t freed_bytes = black_objs_slide_diff_; + bump_pointer_space_->RecordFree(freed_objects_, freed_bytes); + RecordFree(ObjectBytePair(freed_objects_, freed_bytes)); + } + + if (CanCompactMovingSpaceWithMinorFault()) { + CompactMovingSpace<kMinorFaultMode>(/*page=*/nullptr); + } else { + CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin()); + } + + // Make sure no mutator is reading from the from-space before unregistering + // userfaultfd from moving-space and then zapping from-space. The mutator + // and GC may race to set a page state to processing or further along. The two + // attempts are ordered. If the collector wins, then the mutator will see that + // and not access the from-space page. If the muator wins, then the + // compaction_in_progress_count_ increment by the mutator happens-before the test + // here, and we will not see a zero value until the mutator has completed. + for (uint32_t i = 0; compaction_in_progress_count_.load(std::memory_order_acquire) > 0; i++) { + BackOff(i); + } + + size_t moving_space_size = bump_pointer_space_->Capacity(); + UnregisterUffd(bump_pointer_space_->Begin(), + minor_fault_initialized_ ? + (moving_first_objs_count_ + black_page_count_) * kPageSize : + moving_space_size); + + // Release all of the memory taken by moving-space's from-map + if (minor_fault_initialized_) { + if (IsValidFd(moving_from_space_fd_)) { + // A strange behavior is observed wherein between GC cycles the from-space' + // first page is accessed. But the memfd that is mapped on from-space, is + // used on to-space in next GC cycle, causing issues with userfaultfd as the + // page isn't missing. A possible reason for this could be prefetches. The + // mprotect ensures that such accesses don't succeed. + int ret = mprotect(from_space_begin_, moving_space_size, PROT_NONE); + CHECK_EQ(ret, 0) << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno); + // madvise(MADV_REMOVE) needs PROT_WRITE. Use fallocate() instead, which + // does the same thing. + ret = fallocate(moving_from_space_fd_, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + /*offset=*/0, + moving_space_size); + CHECK_EQ(ret, 0) << "fallocate for from-space failed: " << strerror(errno); + } else { + // We don't have a valid fd, so use madvise(MADV_REMOVE) instead. mprotect + // is not required in this case as we create fresh + // MAP_SHARED+MAP_ANONYMOUS mapping in each GC cycle. + int ret = madvise(from_space_begin_, moving_space_size, MADV_REMOVE); + CHECK_EQ(ret, 0) << "madvise(MADV_REMOVE) failed for from-space map:" << strerror(errno); + } + } else { + from_space_map_.MadviseDontNeedAndZero(); + } + // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses. + if (shadow_to_space_map_.IsValid()) { + DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0) + << "mprotect(PROT_NONE) for shadow-map failed:" << strerror(errno); + } + if (!IsValidFd(moving_from_space_fd_)) { + // The other case is already mprotected above. + DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0) + << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno); + } + + ProcessLinearAlloc(); + + if (use_uffd_sigbus_) { + // Set compaction-done bit so that no new mutator threads start compaction + // process in the SIGBUS handler. + SigbusCounterType count = sigbus_in_progress_count_.fetch_or(kSigbusCounterCompactionDoneMask, + std::memory_order_acq_rel); + // Wait for SIGBUS handlers already in play. + for (uint32_t i = 0; count > 0; i++) { + BackOff(i); + count = sigbus_in_progress_count_.load(std::memory_order_acquire); + count &= ~kSigbusCounterCompactionDoneMask; + } + } else { + DCHECK(IsAligned<kPageSize>(conc_compaction_termination_page_)); + // We will only iterate once if gKernelHasFaultRetry is true. + do { + // madvise the page so that we can get userfaults on it. + ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize); + // The following load triggers 'special' userfaults. When received by the + // thread-pool workers, they will exit out of the compaction task. This fault + // happens because we madvised the page. + ForceRead(conc_compaction_termination_page_); + } while (thread_pool_counter_ > 0); + } + // Unregister linear-alloc spaces + for (auto& data : linear_alloc_spaces_data_) { + DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size())); + UnregisterUffd(data.begin_, data.shadow_.Size()); + // madvise linear-allocs's page-status array + data.page_status_map_.MadviseDontNeedAndZero(); + // Madvise the entire linear-alloc space's shadow. In copy-mode it gets rid + // of the pages which are still mapped. In minor-fault mode this unmaps all + // pages, which is good in reducing the mremap (done in STW pause) time in + // next GC cycle. + data.shadow_.MadviseDontNeedAndZero(); + if (minor_fault_initialized_) { + DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_NONE), 0) + << "mprotect failed: " << strerror(errno); + } + } + + if (!use_uffd_sigbus_) { + heap_->GetThreadPool()->StopWorkers(thread_running_gc_); + } +} + +template <size_t kBufferSize> +class MarkCompact::ThreadRootsVisitor : public RootVisitor { + public: + explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self) + : mark_compact_(mark_compact), self_(self) {} + + ~ThreadRootsVisitor() { + Flush(); + } + + void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) + override REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_) { + for (size_t i = 0; i < count; i++) { + mirror::Object* obj = *roots[i]; + if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) { + Push(obj); + } + } + } + + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, + size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + override REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_) { + for (size_t i = 0; i < count; i++) { + mirror::Object* obj = roots[i]->AsMirrorPtr(); + if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) { + Push(obj); + } + } + } + + private: + void Flush() REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_) { + StackReference<mirror::Object>* start; + StackReference<mirror::Object>* end; + { + MutexLock mu(self_, mark_compact_->lock_); + // Loop here because even after expanding once it may not be sufficient to + // accommodate all references. It's almost impossible, but there is no harm + // in implementing it this way. + while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) { + mark_compact_->ExpandMarkStack(); + } + } + while (idx_ > 0) { + *start++ = roots_[--idx_]; + } + DCHECK_EQ(start, end); + } + + void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_) { + if (UNLIKELY(idx_ >= kBufferSize)) { + Flush(); + } + roots_[idx_++].Assign(obj); + } + + StackReference<mirror::Object> roots_[kBufferSize]; + size_t idx_ = 0; + MarkCompact* const mark_compact_; + Thread* const self_; +}; + +class MarkCompact::CheckpointMarkThreadRoots : public Closure { + public: + explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {} + + void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS { + ScopedTrace trace("Marking thread roots"); + // Note: self is not necessarily equal to thread since thread may be + // suspended. + Thread* const self = Thread::Current(); + CHECK(thread == self + || thread->IsSuspended() + || thread->GetState() == ThreadState::kWaitingPerformingGc) + << thread->GetState() << " thread " << thread << " self " << self; + { + ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self); + thread->VisitRoots(&visitor, kVisitRootFlagAllRoots); + } + // Clear page-buffer to prepare for compaction phase. + thread->SetThreadLocalGcBuffer(nullptr); + + // If thread is a running mutator, then act on behalf of the garbage + // collector. See the code in ThreadList::RunCheckpoint. + mark_compact_->GetBarrier().Pass(self); + } + + private: + MarkCompact* const mark_compact_; +}; + +void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) { + // We revote TLABs later during paused round of marking. + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + CheckpointMarkThreadRoots check_point(this); + ThreadList* thread_list = runtime->GetThreadList(); + gc_barrier_.Init(self, 0); + // Request the check point is run on all threads returning a count of the threads that must + // run through the barrier including self. + size_t barrier_count = thread_list->RunCheckpoint(&check_point); + // Release locks then wait for all mutator threads to pass the barrier. + // If there are no threads to wait which implys that all the checkpoint functions are finished, + // then no need to release locks. + if (barrier_count == 0) { + return; + } + Locks::heap_bitmap_lock_->ExclusiveUnlock(self); + Locks::mutator_lock_->SharedUnlock(self); + { + ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun); + gc_barrier_.Increment(self, barrier_count); + } + Locks::mutator_lock_->SharedLock(self); + Locks::heap_bitmap_lock_->ExclusiveLock(self); +} + +void MarkCompact::MarkNonThreadRoots(Runtime* runtime) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + runtime->VisitNonThreadRoots(this); +} + +void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + runtime->VisitConcurrentRoots(this, flags); +} + +void MarkCompact::RevokeAllThreadLocalBuffers() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + bump_pointer_space_->RevokeAllThreadLocalBuffers(); +} + +class MarkCompact::ScanObjectVisitor { + public: + explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE + : mark_compact_(mark_compact) {} + + void operator()(ObjPtr<mirror::Object> obj) const + ALWAYS_INLINE + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr()); + } + + private: + MarkCompact* const mark_compact_; +}; + +void MarkCompact::UpdateAndMarkModUnion() { + accounting::CardTable* const card_table = heap_->GetCardTable(); + for (const auto& space : immune_spaces_.GetSpaces()) { + const char* name = space->IsZygoteSpace() + ? "UpdateAndMarkZygoteModUnionTable" + : "UpdateAndMarkImageModUnionTable"; + DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space; + TimingLogger::ScopedTiming t(name, GetTimings()); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); + if (table != nullptr) { + // UpdateAndMarkReferences() doesn't visit Reference-type objects. But + // that's fine because these objects are immutable enough (referent can + // only be cleared) and hence the only referents they can have are intra-space. + table->UpdateAndMarkReferences(this); + } else { + // No mod-union table, scan all dirty/aged cards in the corresponding + // card-table. This can only occur for app images. + card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + ScanObjectVisitor(this), + gc::accounting::CardTable::kCardAged); + } + } +} + +void MarkCompact::MarkReachableObjects() { + UpdateAndMarkModUnion(); + // Recursively mark all the non-image bits set in the mark bitmap. + ProcessMarkStack(); +} + +class MarkCompact::CardModifiedVisitor { + public: + explicit CardModifiedVisitor(MarkCompact* const mark_compact, + accounting::ContinuousSpaceBitmap* const bitmap, + accounting::CardTable* const card_table) + : visitor_(mark_compact), bitmap_(bitmap), card_table_(card_table) {} + + void operator()(uint8_t* card, + uint8_t expected_value, + uint8_t new_value ATTRIBUTE_UNUSED) const { + if (expected_value == accounting::CardTable::kCardDirty) { + uintptr_t start = reinterpret_cast<uintptr_t>(card_table_->AddrFromCard(card)); + bitmap_->VisitMarkedRange(start, start + accounting::CardTable::kCardSize, visitor_); + } + } + + private: + ScanObjectVisitor visitor_; + accounting::ContinuousSpaceBitmap* bitmap_; + accounting::CardTable* const card_table_; +}; + +void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) { + accounting::CardTable* card_table = heap_->GetCardTable(); + for (const auto& space : heap_->GetContinuousSpaces()) { + const char* name = nullptr; + switch (space->GetGcRetentionPolicy()) { + case space::kGcRetentionPolicyNeverCollect: + name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects"; + break; + case space::kGcRetentionPolicyFullCollect: + name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects"; + break; + case space::kGcRetentionPolicyAlwaysCollect: + name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects"; + break; + default: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } + TimingLogger::ScopedTiming t(name, GetTimings()); + ScanObjectVisitor visitor(this); + const bool is_immune_space = space->IsZygoteSpace() || space->IsImageSpace(); + if (paused) { + DCHECK_EQ(minimum_age, gc::accounting::CardTable::kCardDirty); + // We can clear the card-table for any non-immune space. + if (is_immune_space) { + card_table->Scan</*kClearCard*/false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, + minimum_age); + } else { + card_table->Scan</*kClearCard*/true>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, + minimum_age); + } + } else { + DCHECK_EQ(minimum_age, gc::accounting::CardTable::kCardAged); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); + if (table) { + table->ProcessCards(); + card_table->Scan</*kClearCard*/false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, + minimum_age); + } else { + CardModifiedVisitor card_modified_visitor(this, space->GetMarkBitmap(), card_table); + // For the alloc spaces we should age the dirty cards and clear the rest. + // For image and zygote-space without mod-union-table, age the dirty + // cards but keep the already aged cards unchanged. + // In either case, visit the objects on the cards that were changed from + // dirty to aged. + if (is_immune_space) { + card_table->ModifyCardsAtomic(space->Begin(), + space->End(), + [](uint8_t card) { + return (card == gc::accounting::CardTable::kCardClean) + ? card + : gc::accounting::CardTable::kCardAged; + }, + card_modified_visitor); + } else { + card_table->ModifyCardsAtomic(space->Begin(), + space->End(), + AgeCardVisitor(), + card_modified_visitor); + } + } + } + } +} + +void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) { + ScanDirtyObjects(paused, minimum_age); + ProcessMarkStack(); +} + +void MarkCompact::MarkRoots(VisitRootFlags flags) { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + Runtime* runtime = Runtime::Current(); + // Make sure that the checkpoint which collects the stack roots is the first + // one capturning GC-roots. As this one is supposed to find the address + // everything allocated after that (during this marking phase) will be + // considered 'marked'. + MarkRootsCheckpoint(thread_running_gc_, runtime); + MarkNonThreadRoots(runtime); + MarkConcurrentRoots(flags, runtime); +} + +void MarkCompact::PreCleanCards() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_)); + MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots)); + RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1); +} + +// In a concurrent marking algorithm, if we are not using a write/read barrier, as +// in this case, then we need a stop-the-world (STW) round in the end to mark +// objects which were written into concurrently while concurrent marking was +// performed. +// In order to minimize the pause time, we could take one of the two approaches: +// 1. Keep repeating concurrent marking of dirty cards until the time spent goes +// below a threshold. +// 2. Do two rounds concurrently and then attempt a paused one. If we figure +// that it's taking too long, then resume mutators and retry. +// +// Given the non-trivial fixed overhead of running a round (card table and root +// scan), it might be better to go with approach 2. +void MarkCompact::MarkingPhase() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + DCHECK_EQ(thread_running_gc_, Thread::Current()); + WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_); + BindAndResetBitmaps(); + MarkZygoteLargeObjects(); + MarkRoots( + static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots)); + MarkReachableObjects(); + // Pre-clean dirtied cards to reduce pauses. + PreCleanCards(); + + // Setup reference processing and forward soft references once before enabling + // slow path (in MarkingPause) + ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor(); + bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences(); + rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references); + if (!clear_soft_references) { + // Forward as many SoftReferences as possible before inhibiting reference access. + rp->ForwardSoftReferences(GetTimings()); + } +} + +class MarkCompact::RefFieldsVisitor { + public: + ALWAYS_INLINE explicit RefFieldsVisitor(MarkCompact* const mark_compact) + : mark_compact_(mark_compact) {} + + ALWAYS_INLINE void operator()(mirror::Object* obj, + MemberOffset offset, + bool is_static ATTRIBUTE_UNUSED) const + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (kCheckLocks) { + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); + } + mark_compact_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset); + } + + void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + mark_compact_->DelayReferenceReferent(klass, ref); + } + + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (!root->IsNull()) { + VisitRoot(root); + } + } + + void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (kCheckLocks) { + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); + } + mark_compact_->MarkObject(root->AsMirrorPtr()); + } + + private: + MarkCompact* const mark_compact_; +}; + +template <size_t kAlignment> +size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const { + const size_t index = chunk_idx * kBitmapWordsPerVectorWord; + size_t words = 0; + for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) { + words += POPCOUNT(Bitmap::Begin()[index + i]); + } + return words * kAlignment; +} + +void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) { + DCHECK(obj != nullptr); + DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>()); + uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj); + UpdateClassAfterObjectMap(obj); + size_t size = RoundUp(obj_size, kAlignment); + uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size); + size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize; + // Compute the bit-index within the chunk-info vector word. + bit_index %= kBitsPerVectorWord; + size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment); + + chunk_info_vec_[chunk_idx++] += first_chunk_portion; + DCHECK_LE(first_chunk_portion, size); + for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) { + DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u); + chunk_info_vec_[chunk_idx++] = kOffsetChunkSize; + } + chunk_info_vec_[chunk_idx] += size; + freed_objects_--; +} + +template <bool kUpdateLiveWords> +void MarkCompact::ScanObject(mirror::Object* obj) { + // The size of `obj` is used both here (to update `bytes_scanned_`) and in + // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once + // here and pass that information to `UpdateLivenessInfo`. + size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>(); + bytes_scanned_ += obj_size; + + RefFieldsVisitor visitor(this); + DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces(); + if (kUpdateLiveWords && moving_space_bitmap_->HasAddress(obj)) { + UpdateLivenessInfo(obj, obj_size); + } + obj->VisitReferences(visitor, visitor); +} + +// Scan anything that's on the mark stack. +void MarkCompact::ProcessMarkStack() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + // TODO: try prefetch like in CMS + while (!mark_stack_->IsEmpty()) { + mirror::Object* obj = mark_stack_->PopBack(); + DCHECK(obj != nullptr); + ScanObject</*kUpdateLiveWords*/ true>(obj); + } +} + +void MarkCompact::ExpandMarkStack() { + const size_t new_size = mark_stack_->Capacity() * 2; + std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(), + mark_stack_->End()); + mark_stack_->Resize(new_size); + for (auto& ref : temp) { + mark_stack_->PushBack(ref.AsMirrorPtr()); + } + DCHECK(!mark_stack_->IsFull()); +} + +inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) { + if (UNLIKELY(mark_stack_->IsFull())) { + ExpandMarkStack(); + } + mark_stack_->PushBack(obj); +} + +inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj, + mirror::Object* holder, + MemberOffset offset) { + DCHECK(obj != nullptr); + if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) { + PushOnMarkStack(obj); + } +} + +template <bool kParallel> +inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj, + mirror::Object* holder, + MemberOffset offset) { + // We expect most of the referenes to be in bump-pointer space, so try that + // first to keep the cost of this function minimal. + if (LIKELY(moving_space_bitmap_->HasAddress(obj))) { + return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj) + : !moving_space_bitmap_->Set(obj); + } else if (non_moving_space_bitmap_->HasAddress(obj)) { + return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj) + : !non_moving_space_bitmap_->Set(obj); + } else if (immune_spaces_.ContainsObject(obj)) { + DCHECK(IsMarked(obj) != nullptr); + return false; + } else { + // Must be a large-object space, otherwise it's a case of heap corruption. + if (!IsAligned<kPageSize>(obj)) { + // Objects in large-object space are page aligned. So if we have an object + // which doesn't belong to any space and is not page-aligned as well, then + // it's memory corruption. + // TODO: implement protect/unprotect in bump-pointer space. + heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true); + } + DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr) + << "ref=" << obj + << " doesn't belong to any of the spaces and large object space doesn't exist"; + accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap(); + DCHECK(los_bitmap->HasAddress(obj)); + if (kParallel) { + los_bitmap->AtomicTestAndSet(obj); + } else { + los_bitmap->Set(obj); + } + // We only have primitive arrays in large object space. So there is no + // reason to push into mark-stack. + DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray())); + return false; + } +} + +inline void MarkCompact::MarkObject(mirror::Object* obj, + mirror::Object* holder, + MemberOffset offset) { + if (obj != nullptr) { + MarkObjectNonNull(obj, holder, offset); + } +} + +mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) { + MarkObject(obj, nullptr, MemberOffset(0)); + return obj; +} + +void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj, + bool do_atomic_update ATTRIBUTE_UNUSED) { + MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0)); +} + +void MarkCompact::VisitRoots(mirror::Object*** roots, + size_t count, + const RootInfo& info) { + if (compacting_) { + for (size_t i = 0; i < count; ++i) { + UpdateRoot(roots[i], info); + } + } else { + for (size_t i = 0; i < count; ++i) { + MarkObjectNonNull(*roots[i]); + } + } +} + +void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, + size_t count, + const RootInfo& info) { + // TODO: do we need to check if the root is null or not? + if (compacting_) { + for (size_t i = 0; i < count; ++i) { + UpdateRoot(roots[i], info); + } + } else { + for (size_t i = 0; i < count; ++i) { + MarkObjectNonNull(roots[i]->AsMirrorPtr()); + } + } +} + +mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) { + if (moving_space_bitmap_->HasAddress(obj)) { + const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_; + if (compacting_) { + if (is_black) { + return PostCompactBlackObjAddr(obj); + } else if (live_words_bitmap_->Test(obj)) { + return PostCompactOldObjAddr(obj); + } else { + return nullptr; + } + } + return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr; + } else if (non_moving_space_bitmap_->HasAddress(obj)) { + return non_moving_space_bitmap_->Test(obj) ? obj : nullptr; + } else if (immune_spaces_.ContainsObject(obj)) { + return obj; + } else { + DCHECK(heap_->GetLargeObjectsSpace()) + << "ref=" << obj + << " doesn't belong to any of the spaces and large object space doesn't exist"; + accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap(); + if (los_bitmap->HasAddress(obj)) { + DCHECK(IsAligned<kPageSize>(obj)); + return los_bitmap->Test(obj) ? obj : nullptr; + } else { + // The given obj is not in any of the known spaces, so return null. This could + // happen for instance in interpreter caches wherein a concurrent updation + // to the cache could result in obj being a non-reference. This is + // tolerable because SweepInterpreterCaches only updates if the given + // object has moved, which can't be the case for the non-reference. + return nullptr; + } + } +} + +bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj, + bool do_atomic_update ATTRIBUTE_UNUSED) { + mirror::Object* ref = obj->AsMirrorPtr(); + if (ref == nullptr) { + return true; + } + return IsMarked(ref); +} + +// Process the 'referent' field in a java.lang.ref.Reference. If the referent +// has not yet been marked, put it on the appropriate list in the heap for later +// processing. +void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass, + ObjPtr<mirror::Reference> ref) { + heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this); +} + +void MarkCompact::FinishPhase() { + GetCurrentIteration()->SetScannedBytes(bytes_scanned_); + bool is_zygote = Runtime::Current()->IsZygote(); + compacting_ = false; + minor_fault_initialized_ = !is_zygote && uffd_minor_fault_supported_; + // Madvise compaction buffers. When using threaded implementation, skip the first page, + // which is used by the gc-thread for the next iteration. Otherwise, we get into a + // deadlock due to userfault on it in the next iteration. This page is not consuming any + // physical memory because we already madvised it above and then we triggered a read + // userfault, which maps a special zero-page. + if (use_uffd_sigbus_ || !minor_fault_initialized_ || !shadow_to_space_map_.IsValid() || + shadow_to_space_map_.Size() < (moving_first_objs_count_ + black_page_count_) * kPageSize) { + size_t adjustment = use_uffd_sigbus_ ? 0 : kPageSize; + ZeroAndReleasePages(compaction_buffers_map_.Begin() + adjustment, + compaction_buffers_map_.Size() - adjustment); + } else if (shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) { + // Now that we are going to use minor-faults from next GC cycle, we can + // unmap the buffers used by worker threads. + compaction_buffers_map_.SetSize(kPageSize); + } + info_map_.MadviseDontNeedAndZero(); + live_words_bitmap_->ClearBitmap(); + // TODO: We can clear this bitmap right before compaction pause. But in that + // case we need to ensure that we don't assert on this bitmap afterwards. + // Also, we would still need to clear it here again as we may have to use the + // bitmap for black-allocations (see UpdateMovingSpaceBlackAllocations()). + moving_space_bitmap_->Clear(); + + if (UNLIKELY(is_zygote && IsValidFd(uffd_))) { + heap_->DeleteThreadPool(); + // This unregisters all ranges as a side-effect. + close(uffd_); + uffd_ = kFdUnused; + uffd_initialized_ = false; + } + CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty. + mark_stack_->Reset(); + DCHECK_EQ(thread_running_gc_, Thread::Current()); + if (kIsDebugBuild) { + MutexLock mu(thread_running_gc_, lock_); + if (updated_roots_.get() != nullptr) { + updated_roots_->clear(); + } + } + class_after_obj_ordered_map_.clear(); + delete[] moving_pages_status_; + linear_alloc_arenas_.clear(); + { + ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_); + WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_); + heap_->ClearMarkedObjects(); + } + std::swap(moving_to_space_fd_, moving_from_space_fd_); + if (IsValidFd(moving_to_space_fd_)) { + // Confirm that the memfd to be used on to-space in next GC cycle is empty. + struct stat buf; + DCHECK_EQ(fstat(moving_to_space_fd_, &buf), 0) << "fstat failed: " << strerror(errno); + DCHECK_EQ(buf.st_blocks, 0u); + } +} + +} // namespace collector +} // namespace gc +} // namespace art diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h new file mode 100644 index 0000000000..d73f40d436 --- /dev/null +++ b/runtime/gc/collector/mark_compact.h @@ -0,0 +1,789 @@ +/* + * Copyright 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ +#define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ + +#include <signal.h> + +#include <map> +#include <memory> +#include <unordered_map> +#include <unordered_set> + +#include "barrier.h" +#include "base/atomic.h" +#include "base/gc_visited_arena_pool.h" +#include "base/macros.h" +#include "base/mutex.h" +#include "garbage_collector.h" +#include "gc/accounting/atomic_stack.h" +#include "gc/accounting/bitmap-inl.h" +#include "gc/accounting/heap_bitmap.h" +#include "gc_root.h" +#include "immune_spaces.h" +#include "offsets.h" + +namespace art { + +bool KernelSupportsUffd(); + +namespace mirror { +class DexCache; +} // namespace mirror + +namespace gc { + +class Heap; + +namespace space { +class BumpPointerSpace; +} // namespace space + +namespace collector { +class MarkCompact final : public GarbageCollector { + public: + using SigbusCounterType = uint32_t; + + static constexpr size_t kAlignment = kObjectAlignment; + static constexpr int kCopyMode = -1; + static constexpr int kMinorFaultMode = -2; + // Fake file descriptor for fall back mode (when uffd isn't available) + static constexpr int kFallbackMode = -3; + static constexpr int kFdSharedAnon = -1; + static constexpr int kFdUnused = -2; + + // Bitmask for the compaction-done bit in the sigbus_in_progress_count_. + static constexpr SigbusCounterType kSigbusCounterCompactionDoneMask = + 1u << (BitSizeOf<SigbusCounterType>() - 1); + + explicit MarkCompact(Heap* heap); + + ~MarkCompact() {} + + void RunPhases() override REQUIRES(!Locks::mutator_lock_, !lock_); + + // Updated before (or in) pre-compaction pause and is accessed only in the + // pause or during concurrent compaction. The flag is reset in next GC cycle's + // InitializePhase(). Therefore, it's safe to update without any memory ordering. + bool IsCompacting() const { return compacting_; } + + bool IsUsingSigbusFeature() const { return use_uffd_sigbus_; } + + // Called by SIGBUS handler. NO_THREAD_SAFETY_ANALYSIS for mutator-lock, which + // is asserted in the function. + bool SigbusHandler(siginfo_t* info) REQUIRES(!lock_) NO_THREAD_SAFETY_ANALYSIS; + + GcType GetGcType() const override { + return kGcTypeFull; + } + + CollectorType GetCollectorType() const override { + return kCollectorTypeCMC; + } + + Barrier& GetBarrier() { + return gc_barrier_; + } + + mirror::Object* MarkObject(mirror::Object* obj) override + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj, + bool do_atomic_update) override + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + void VisitRoots(mirror::Object*** roots, + size_t count, + const RootInfo& info) override + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, + size_t count, + const RootInfo& info) override + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj, + bool do_atomic_update) override + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + void RevokeAllThreadLocalBuffers() override; + + void DelayReferenceReferent(ObjPtr<mirror::Class> klass, + ObjPtr<mirror::Reference> reference) override + REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + + mirror::Object* IsMarked(mirror::Object* obj) override + REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + + mirror::Object* GetFromSpaceAddrFromBarrier(mirror::Object* old_ref) { + CHECK(compacting_); + if (live_words_bitmap_->HasAddress(old_ref)) { + return GetFromSpaceAddr(old_ref); + } + return old_ref; + } + // Called from Heap::PostForkChildAction() for non-zygote processes and from + // PrepareForCompaction() for zygote processes. Returns true if uffd was + // created or was already done. + bool CreateUserfaultfd(bool post_fork); + + // Returns a pair indicating if userfaultfd itself is available (first) and if + // so then whether its minor-fault feature is available or not (second). + static std::pair<bool, bool> GetUffdAndMinorFault(); + + // Add linear-alloc space data when a new space is added to + // GcVisitedArenaPool, which mostly happens only once. + void AddLinearAllocSpaceData(uint8_t* begin, size_t len); + + // In copy-mode of userfaultfd, we don't need to reach a 'processed' state as + // it's given that processing thread also copies the page, thereby mapping it. + // The order is important as we may treat them as integers. + enum class PageState : uint8_t { + kUnprocessed = 0, // Not processed yet + kProcessing = 1, // Being processed by GC thread and will not be mapped + kProcessed = 2, // Processed but not mapped + kProcessingAndMapping = 3, // Being processed by GC or mutator and will be mapped + kMutatorProcessing = 4, // Being processed by mutator thread + kProcessedAndMapping = 5, // Processed and will be mapped + kProcessedAndMapped = 6 // Processed and mapped. For SIGBUS. + }; + + private: + using ObjReference = mirror::CompressedReference<mirror::Object>; + // Number of bits (live-words) covered by a single chunk-info (below) + // entry/word. + // TODO: Since popcount is performed usomg SIMD instructions, we should + // consider using 128-bit in order to halve the chunk-info size. + static constexpr uint32_t kBitsPerVectorWord = kBitsPerIntPtrT; + static constexpr uint32_t kOffsetChunkSize = kBitsPerVectorWord * kAlignment; + static_assert(kOffsetChunkSize < kPageSize); + // Bitmap with bits corresponding to every live word set. For an object + // which is 4 words in size will have the corresponding 4 bits set. This is + // required for efficient computation of new-address (post-compaction) from + // the given old-address (pre-compaction). + template <size_t kAlignment> + class LiveWordsBitmap : private accounting::MemoryRangeBitmap<kAlignment> { + using Bitmap = accounting::Bitmap; + using MemRangeBitmap = accounting::MemoryRangeBitmap<kAlignment>; + + public: + static_assert(IsPowerOfTwo(kBitsPerVectorWord)); + static_assert(IsPowerOfTwo(Bitmap::kBitsPerBitmapWord)); + static_assert(kBitsPerVectorWord >= Bitmap::kBitsPerBitmapWord); + static constexpr uint32_t kBitmapWordsPerVectorWord = + kBitsPerVectorWord / Bitmap::kBitsPerBitmapWord; + static_assert(IsPowerOfTwo(kBitmapWordsPerVectorWord)); + static LiveWordsBitmap* Create(uintptr_t begin, uintptr_t end); + + // Return offset (within the indexed chunk-info) of the nth live word. + uint32_t FindNthLiveWordOffset(size_t chunk_idx, uint32_t n) const; + // Sets all bits in the bitmap corresponding to the given range. Also + // returns the bit-index of the first word. + ALWAYS_INLINE uintptr_t SetLiveWords(uintptr_t begin, size_t size); + // Count number of live words upto the given bit-index. This is to be used + // to compute the post-compact address of an old reference. + ALWAYS_INLINE size_t CountLiveWordsUpto(size_t bit_idx) const; + // Call 'visitor' for every stride of contiguous marked bits in the live-words + // bitmap, starting from begin_bit_idx. Only visit 'bytes' live bytes or + // until 'end', whichever comes first. + // Visitor is called with index of the first marked bit in the stride, + // stride size and whether it's the last stride in the given range or not. + template <typename Visitor> + ALWAYS_INLINE void VisitLiveStrides(uintptr_t begin_bit_idx, + uint8_t* end, + const size_t bytes, + Visitor&& visitor) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Count the number of live bytes in the given vector entry. + size_t LiveBytesInBitmapWord(size_t chunk_idx) const; + void ClearBitmap() { Bitmap::Clear(); } + ALWAYS_INLINE uintptr_t Begin() const { return MemRangeBitmap::CoverBegin(); } + ALWAYS_INLINE bool HasAddress(mirror::Object* obj) const { + return MemRangeBitmap::HasAddress(reinterpret_cast<uintptr_t>(obj)); + } + ALWAYS_INLINE bool Test(uintptr_t bit_index) const { + return Bitmap::TestBit(bit_index); + } + ALWAYS_INLINE bool Test(mirror::Object* obj) const { + return MemRangeBitmap::Test(reinterpret_cast<uintptr_t>(obj)); + } + ALWAYS_INLINE uintptr_t GetWord(size_t index) const { + static_assert(kBitmapWordsPerVectorWord == 1); + return Bitmap::Begin()[index * kBitmapWordsPerVectorWord]; + } + }; + + // For a given object address in pre-compact space, return the corresponding + // address in the from-space, where heap pages are relocated in the compaction + // pause. + mirror::Object* GetFromSpaceAddr(mirror::Object* obj) const { + DCHECK(live_words_bitmap_->HasAddress(obj)) << " obj=" << obj; + return reinterpret_cast<mirror::Object*>(reinterpret_cast<uintptr_t>(obj) + + from_space_slide_diff_); + } + + // Verifies that that given object reference refers to a valid object. + // Otherwise fataly dumps logs, including those from callback. + template <typename Callback> + void VerifyObject(mirror::Object* ref, Callback& callback) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Check if the obj is within heap and has a klass which is likely to be valid + // mirror::Class. + bool IsValidObject(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_); + void InitializePhase(); + void FinishPhase() REQUIRES(!Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !lock_); + void MarkingPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); + void CompactionPhase() REQUIRES_SHARED(Locks::mutator_lock_); + + void SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Locks::heap_bitmap_lock_); + // Update the reference at given offset in the given object with post-compact + // address. + ALWAYS_INLINE void UpdateRef(mirror::Object* obj, MemberOffset offset) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Verify that the gc-root is updated only once. Returns false if the update + // shouldn't be done. + ALWAYS_INLINE bool VerifyRootSingleUpdate(void* root, + mirror::Object* old_ref, + const RootInfo& info) + REQUIRES_SHARED(Locks::mutator_lock_); + // Update the given root with post-compact address. + ALWAYS_INLINE void UpdateRoot(mirror::CompressedReference<mirror::Object>* root, + const RootInfo& info = RootInfo(RootType::kRootUnknown)) + REQUIRES_SHARED(Locks::mutator_lock_); + ALWAYS_INLINE void UpdateRoot(mirror::Object** root, + const RootInfo& info = RootInfo(RootType::kRootUnknown)) + REQUIRES_SHARED(Locks::mutator_lock_); + // Given the pre-compact address, the function returns the post-compact + // address of the given object. + ALWAYS_INLINE mirror::Object* PostCompactAddress(mirror::Object* old_ref) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Compute post-compact address of an object in moving space. This function + // assumes that old_ref is in moving space. + ALWAYS_INLINE mirror::Object* PostCompactAddressUnchecked(mirror::Object* old_ref) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Compute the new address for an object which was allocated prior to starting + // this GC cycle. + ALWAYS_INLINE mirror::Object* PostCompactOldObjAddr(mirror::Object* old_ref) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Compute the new address for an object which was black allocated during this + // GC cycle. + ALWAYS_INLINE mirror::Object* PostCompactBlackObjAddr(mirror::Object* old_ref) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Identify immune spaces and reset card-table, mod-union-table, and mark + // bitmaps. + void BindAndResetBitmaps() REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + // Perform one last round of marking, identifying roots from dirty cards + // during a stop-the-world (STW) pause. + void MarkingPause() REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_); + // Perform stop-the-world pause prior to concurrent compaction. + // Updates GC-roots and protects heap so that during the concurrent + // compaction phase we can receive faults and compact the corresponding pages + // on the fly. + void CompactionPause() REQUIRES(Locks::mutator_lock_); + // Compute offsets (in chunk_info_vec_) and other data structures required + // during concurrent compaction. + void PrepareForCompaction() REQUIRES_SHARED(Locks::mutator_lock_); + + // Copy kPageSize live bytes starting from 'offset' (within the moving space), + // which must be within 'obj', into the kPageSize sized memory pointed by 'addr'. + // Then update the references within the copied objects. The boundary objects are + // partially updated such that only the references that lie in the page are updated. + // This is necessary to avoid cascading userfaults. + void CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr, bool needs_memset_zero) + REQUIRES_SHARED(Locks::mutator_lock_); + // Compact the bump-pointer space. Pass page that should be used as buffer for + // userfaultfd. + template <int kMode> + void CompactMovingSpace(uint8_t* page) REQUIRES_SHARED(Locks::mutator_lock_); + + // Compact the given page as per func and change its state. Also map/copy the + // page, if required. + template <int kMode, typename CompactionFn> + ALWAYS_INLINE void DoPageCompactionWithStateChange(size_t page_idx, + size_t status_arr_len, + uint8_t* to_space_page, + uint8_t* page, + CompactionFn func) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Update all the objects in the given non-moving space page. 'first' object + // could have started in some preceding page. + void UpdateNonMovingPage(mirror::Object* first, uint8_t* page) + REQUIRES_SHARED(Locks::mutator_lock_); + // Update all the references in the non-moving space. + void UpdateNonMovingSpace() REQUIRES_SHARED(Locks::mutator_lock_); + + // For all the pages in non-moving space, find the first object that overlaps + // with the pages' start address, and store in first_objs_non_moving_space_ array. + void InitNonMovingSpaceFirstObjects() REQUIRES_SHARED(Locks::mutator_lock_); + // In addition to the first-objects for every post-compact moving space page, + // also find offsets within those objects from where the contents should be + // copied to the page. The offsets are relative to the moving-space's + // beginning. Store the computed first-object and offset in first_objs_moving_space_ + // and pre_compact_offset_moving_space_ respectively. + void InitMovingSpaceFirstObjects(const size_t vec_len) REQUIRES_SHARED(Locks::mutator_lock_); + + // Gather the info related to black allocations from bump-pointer space to + // enable concurrent sliding of these pages. + void UpdateMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + // Update first-object info from allocation-stack for non-moving space black + // allocations. + void UpdateNonMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + + // Slides (retain the empty holes, which are usually part of some in-use TLAB) + // black page in the moving space. 'first_obj' is the object that overlaps with + // the first byte of the page being slid. pre_compact_page is the pre-compact + // address of the page being slid. 'page_idx' is used to fetch the first + // allocated chunk's size and next page's first_obj. 'dest' is the kPageSize + // sized memory where the contents would be copied. + void SlideBlackPage(mirror::Object* first_obj, + const size_t page_idx, + uint8_t* const pre_compact_page, + uint8_t* dest, + bool needs_memset_zero) REQUIRES_SHARED(Locks::mutator_lock_); + + // Perform reference-processing and the likes before sweeping the non-movable + // spaces. + void ReclaimPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); + + // Mark GC-roots (except from immune spaces and thread-stacks) during a STW pause. + void ReMarkRoots(Runtime* runtime) REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + // Concurrently mark GC-roots, except from immune spaces. + void MarkRoots(VisitRootFlags flags) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + // Collect thread stack roots via a checkpoint. + void MarkRootsCheckpoint(Thread* self, Runtime* runtime) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + // Second round of concurrent marking. Mark all gray objects that got dirtied + // since the first round. + void PreCleanCards() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); + + void MarkNonThreadRoots(Runtime* runtime) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + void MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) + REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); + + // Traverse through the reachable objects and mark them. + void MarkReachableObjects() REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + // Scan (only) immune spaces looking for references into the garbage collected + // spaces. + void UpdateAndMarkModUnion() REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + // Scan mod-union and card tables, covering all the spaces, to identify dirty objects. + // These are in 'minimum age' cards, which is 'kCardAged' in case of concurrent (second round) + // marking and kCardDirty during the STW pause. + void ScanDirtyObjects(bool paused, uint8_t minimum_age) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + // Recursively mark dirty objects. Invoked both concurrently as well in a STW + // pause in PausePhase(). + void RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + // Go through all the objects in the mark-stack until it's empty. + void ProcessMarkStack() override REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + void ExpandMarkStack() REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + // Scan object for references. If kUpdateLivewords is true then set bits in + // the live-words bitmap and add size to chunk-info. + template <bool kUpdateLiveWords> + void ScanObject(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + // Push objects to the mark-stack right after successfully marking objects. + void PushOnMarkStack(mirror::Object* obj) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + // Update the live-words bitmap as well as add the object size to the + // chunk-info vector. Both are required for computation of post-compact addresses. + // Also updates freed_objects_ counter. + void UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) + REQUIRES_SHARED(Locks::mutator_lock_); + + void ProcessReferences(Thread* self) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Locks::heap_bitmap_lock_); + + void MarkObjectNonNull(mirror::Object* obj, + mirror::Object* holder = nullptr, + MemberOffset offset = MemberOffset(0)) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + void MarkObject(mirror::Object* obj, mirror::Object* holder, MemberOffset offset) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + template <bool kParallel> + bool MarkObjectNonNullNoPush(mirror::Object* obj, + mirror::Object* holder = nullptr, + MemberOffset offset = MemberOffset(0)) + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_); + + void Sweep(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + void SweepLargeObjects(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + // Perform all kernel operations required for concurrent compaction. Includes + // mremap to move pre-compact pages to from-space, followed by userfaultfd + // registration on the moving space and linear-alloc. + void KernelPreparation(); + // Called by KernelPreparation() for every memory range being prepared for + // userfaultfd registration. + void KernelPrepareRangeForUffd(uint8_t* to_addr, + uint8_t* from_addr, + size_t map_size, + int fd, + uint8_t* shadow_addr = nullptr); + + void RegisterUffd(void* addr, size_t size, int mode); + void UnregisterUffd(uint8_t* start, size_t len); + + // Called by thread-pool workers to read uffd_ and process fault events. + template <int kMode> + void ConcurrentCompaction(uint8_t* buf) REQUIRES_SHARED(Locks::mutator_lock_); + // Called by thread-pool workers to compact and copy/map the fault page in + // moving space. + template <int kMode> + void ConcurrentlyProcessMovingPage(uint8_t* fault_page, + uint8_t* buf, + size_t nr_moving_space_used_pages) + REQUIRES_SHARED(Locks::mutator_lock_); + // Called by thread-pool workers to process and copy/map the fault page in + // linear-alloc. + template <int kMode> + void ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Process concurrently all the pages in linear-alloc. Called by gc-thread. + void ProcessLinearAlloc() REQUIRES_SHARED(Locks::mutator_lock_); + + // Returns true if the moving space can be compacted using uffd's minor-fault + // feature. + bool CanCompactMovingSpaceWithMinorFault(); + + void FreeFromSpacePages(size_t cur_page_idx, int mode) REQUIRES_SHARED(Locks::mutator_lock_); + + // Maps processed pages (from moving space and linear-alloc) for uffd's + // minor-fault feature. We try to 'claim' all processed (and unmapped) pages + // contiguous to 'to_space_start'. + // kFirstPageMapping indicates if the first page is already claimed or not. It + // also indicates that the ioctl must succeed in mapping the first page. + template <bool kFirstPageMapping> + void MapProcessedPages(uint8_t* to_space_start, + Atomic<PageState>* state_arr, + size_t arr_idx, + size_t arr_len) REQUIRES_SHARED(Locks::mutator_lock_); + + bool IsValidFd(int fd) const { return fd >= 0; } + // Add/update <class, obj> pair if class > obj and obj is the lowest address + // object of class. + ALWAYS_INLINE void UpdateClassAfterObjectMap(mirror::Object* obj) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Updates 'class_after_obj_map_' map by updating the keys (class) with its + // highest-address super-class (obtained from 'super_class_after_class_map_'), + // if there is any. This is to ensure we don't free from-space pages before + // the lowest-address obj is compacted. + void UpdateClassAfterObjMap(); + + void MarkZygoteLargeObjects() REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(Locks::heap_bitmap_lock_); + + void ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent); + void CopyIoctl(void* dst, void* buffer); + // Called after updating a linear-alloc page to either map a zero-page if the + // page wasn't touched during updation, or map the page via copy-ioctl. And + // then updates the page's state to indicate the page is mapped. + void MapUpdatedLinearAllocPage(uint8_t* page, + uint8_t* shadow_page, + Atomic<PageState>& state, + bool page_touched); + + // For checkpoints + Barrier gc_barrier_; + // Every object inside the immune spaces is assumed to be marked. + ImmuneSpaces immune_spaces_; + // Required only when mark-stack is accessed in shared mode, which happens + // when collecting thread-stack roots using checkpoint. Otherwise, we use it + // to synchronize on updated_roots_ in debug-builds. + Mutex lock_; + accounting::ObjectStack* mark_stack_; + // Special bitmap wherein all the bits corresponding to an object are set. + // TODO: make LiveWordsBitmap encapsulated in this class rather than a + // pointer. We tend to access its members in performance-sensitive + // code-path. Also, use a single MemMap for all the GC's data structures, + // which we will clear in the end. This would help in limiting the number of + // VMAs that get created in the kernel. + std::unique_ptr<LiveWordsBitmap<kAlignment>> live_words_bitmap_; + // Track GC-roots updated so far in a GC-cycle. This is to confirm that no + // GC-root is updated twice. + // TODO: Must be replaced with an efficient mechanism eventually. Or ensure + // that double updation doesn't happen in the first place. + std::unique_ptr<std::unordered_set<void*>> updated_roots_ GUARDED_BY(lock_); + MemMap from_space_map_; + MemMap shadow_to_space_map_; + // Any array of live-bytes in logical chunks of kOffsetChunkSize size + // in the 'to-be-compacted' space. + MemMap info_map_; + // Set of page-sized buffers used for compaction. The first page is used by + // the GC thread. Subdequent pages are used by mutator threads in case of + // SIGBUS feature, and by uffd-worker threads otherwise. In the latter case + // the first page is also used for termination of concurrent compaction by + // making worker threads terminate the userfaultfd read loop. + MemMap compaction_buffers_map_; + + class LessByArenaAddr { + public: + bool operator()(const TrackedArena* a, const TrackedArena* b) const { + return std::less<uint8_t*>{}(a->Begin(), b->Begin()); + } + }; + + // Map of arenas allocated in LinearAlloc arena-pool and last non-zero page, + // captured during compaction pause for concurrent updates. + std::map<const TrackedArena*, uint8_t*, LessByArenaAddr> linear_alloc_arenas_; + // Set of PageStatus arrays, one per arena-pool space. It's extremely rare to + // have more than one, but this is to be ready for the worst case. + class LinearAllocSpaceData { + public: + LinearAllocSpaceData(MemMap&& shadow, + MemMap&& page_status_map, + uint8_t* begin, + uint8_t* end, + bool already_shared) + : shadow_(std::move(shadow)), + page_status_map_(std::move(page_status_map)), + begin_(begin), + end_(end), + already_shared_(already_shared) {} + + MemMap shadow_; + MemMap page_status_map_; + uint8_t* begin_; + uint8_t* end_; + // Indicates if the linear-alloc is already MAP_SHARED. + bool already_shared_; + }; + + std::vector<LinearAllocSpaceData> linear_alloc_spaces_data_; + + class ObjReferenceHash { + public: + uint32_t operator()(const ObjReference& ref) const { + return ref.AsVRegValue() >> kObjectAlignmentShift; + } + }; + + class ObjReferenceEqualFn { + public: + bool operator()(const ObjReference& a, const ObjReference& b) const { + return a.AsMirrorPtr() == b.AsMirrorPtr(); + } + }; + + class LessByObjReference { + public: + bool operator()(const ObjReference& a, const ObjReference& b) const { + return std::less<mirror::Object*>{}(a.AsMirrorPtr(), b.AsMirrorPtr()); + } + }; + + // Data structures used to track objects whose layout information is stored in later + // allocated classes (at higher addresses). We must be careful not to free the + // corresponding from-space pages prematurely. + using ObjObjOrderedMap = std::map<ObjReference, ObjReference, LessByObjReference>; + using ObjObjUnorderedMap = + std::unordered_map<ObjReference, ObjReference, ObjReferenceHash, ObjReferenceEqualFn>; + // Unordered map of <K, S> such that the class K (in moving space) has kClassWalkSuper + // in reference bitmap and S is its highest address super class. + ObjObjUnorderedMap super_class_after_class_hash_map_; + // Unordered map of <K, V> such that the class K (in moving space) is after its objects + // or would require iterating super-class hierarchy when visiting references. And V is + // its lowest address object (in moving space). + ObjObjUnorderedMap class_after_obj_hash_map_; + // Ordered map constructed before starting compaction using the above two maps. Key is a + // class (or super-class) which is higher in address order than some of its object(s) and + // value is the corresponding object with lowest address. + ObjObjOrderedMap class_after_obj_ordered_map_; + // Since the compaction is done in reverse, we use a reverse iterator. It is maintained + // either at the pair whose class is lower than the first page to be freed, or at the + // pair whose object is not yet compacted. + ObjObjOrderedMap::const_reverse_iterator class_after_obj_iter_; + // Cached reference to the last class which has kClassWalkSuper in reference + // bitmap but has all its super classes lower address order than itself. + mirror::Class* walk_super_class_cache_; + // Used by FreeFromSpacePages() for maintaining markers in the moving space for + // how far the pages have been reclaimed/checked. + size_t last_checked_reclaim_page_idx_; + uint8_t* last_reclaimed_page_; + + space::ContinuousSpace* non_moving_space_; + space::BumpPointerSpace* const bump_pointer_space_; + // The main space bitmap + accounting::ContinuousSpaceBitmap* const moving_space_bitmap_; + accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_; + Thread* thread_running_gc_; + // Array of moving-space's pages' compaction status. + Atomic<PageState>* moving_pages_status_; + size_t vector_length_; + size_t live_stack_freeze_size_; + + uint64_t bytes_scanned_; + + // For every page in the to-space (post-compact heap) we need to know the + // first object from which we must compact and/or update references. This is + // for both non-moving and moving space. Additionally, for the moving-space, + // we also need the offset within the object from where we need to start + // copying. + // chunk_info_vec_ holds live bytes for chunks during marking phase. After + // marking we perform an exclusive scan to compute offset for every chunk. + uint32_t* chunk_info_vec_; + // For pages before black allocations, pre_compact_offset_moving_space_[i] + // holds offset within the space from where the objects need to be copied in + // the ith post-compact page. + // Otherwise, black_alloc_pages_first_chunk_size_[i] holds the size of first + // non-empty chunk in the ith black-allocations page. + union { + uint32_t* pre_compact_offset_moving_space_; + uint32_t* black_alloc_pages_first_chunk_size_; + }; + // first_objs_moving_space_[i] is the pre-compact address of the object which + // would overlap with the starting boundary of the ith post-compact page. + ObjReference* first_objs_moving_space_; + // First object for every page. It could be greater than the page's start + // address, or null if the page is empty. + ObjReference* first_objs_non_moving_space_; + size_t non_moving_first_objs_count_; + // Length of first_objs_moving_space_ and pre_compact_offset_moving_space_ + // arrays. Also the number of pages which are to be compacted. + size_t moving_first_objs_count_; + // Number of pages containing black-allocated objects, indicating number of + // pages to be slid. + size_t black_page_count_; + + uint8_t* from_space_begin_; + // moving-space's end pointer at the marking pause. All allocations beyond + // this will be considered black in the current GC cycle. Aligned up to page + // size. + uint8_t* black_allocations_begin_; + // End of compacted space. Use for computing post-compact addr of black + // allocated objects. Aligned up to page size. + uint8_t* post_compact_end_; + // Cache (black_allocations_begin_ - post_compact_end_) for post-compact + // address computations. + ptrdiff_t black_objs_slide_diff_; + // Cache (from_space_begin_ - bump_pointer_space_->Begin()) so that we can + // compute from-space address of a given pre-comapct addr efficiently. + ptrdiff_t from_space_slide_diff_; + + // TODO: Remove once an efficient mechanism to deal with double root updation + // is incorporated. + void* stack_high_addr_; + void* stack_low_addr_; + + uint8_t* conc_compaction_termination_page_; + + PointerSize pointer_size_; + // Number of objects freed during this GC in moving space. It is decremented + // every time an object is discovered. And total-object count is added to it + // in MarkingPause(). It reaches the correct count only once the marking phase + // is completed. + int32_t freed_objects_; + // memfds for moving space for using userfaultfd's minor-fault feature. + // Initialized to kFdUnused to indicate that mmap should be MAP_PRIVATE in + // KernelPrepareRange(). + int moving_to_space_fd_; + int moving_from_space_fd_; + // Userfault file descriptor, accessed only by the GC itself. + // kFallbackMode value indicates that we are in the fallback mode. + int uffd_; + // Number of mutator-threads currently executing SIGBUS handler. When the + // GC-thread is done with compaction, it set the most significant bit to + // indicate that. Mutator threads check for the flag when incrementing in the + // handler. + std::atomic<SigbusCounterType> sigbus_in_progress_count_; + // Number of mutator-threads/uffd-workers working on moving-space page. It + // must be 0 before gc-thread can unregister the space after it's done + // sequentially compacting all pages of the space. + std::atomic<uint16_t> compaction_in_progress_count_; + // When using SIGBUS feature, this counter is used by mutators to claim a page + // out of compaction buffers to be used for the entire compaction cycle. + std::atomic<uint16_t> compaction_buffer_counter_; + // Used to exit from compaction loop at the end of concurrent compaction + uint8_t thread_pool_counter_; + // True while compacting. + bool compacting_; + // Flag indicating whether one-time uffd initialization has been done. It will + // be false on the first GC for non-zygote processes, and always for zygote. + // Its purpose is to minimize the userfaultfd overhead to the minimal in + // Heap::PostForkChildAction() as it's invoked in app startup path. With + // this, we register the compaction-termination page on the first GC. + bool uffd_initialized_; + // Flag indicating if userfaultfd supports minor-faults. Set appropriately in + // CreateUserfaultfd(), where we get this information from the kernel. + const bool uffd_minor_fault_supported_; + // Flag indicating if we should use sigbus signals instead of threads to + // handle userfaults. + const bool use_uffd_sigbus_; + // For non-zygote processes this flag indicates if the spaces are ready to + // start using userfaultfd's minor-fault feature. This initialization involves + // starting to use shmem (memfd_create) for the userfaultfd protected spaces. + bool minor_fault_initialized_; + // Set to true when linear-alloc can start mapping with MAP_SHARED. Set on + // non-zygote processes during first GC, which sets up everyting for using + // minor-fault from next GC. + bool map_linear_alloc_shared_; + + class FlipCallback; + class ThreadFlipVisitor; + class VerifyRootMarkedVisitor; + class ScanObjectVisitor; + class CheckpointMarkThreadRoots; + template<size_t kBufferSize> class ThreadRootsVisitor; + class CardModifiedVisitor; + class RefFieldsVisitor; + template <bool kCheckBegin, bool kCheckEnd> class RefsUpdateVisitor; + class ArenaPoolPageUpdater; + class ClassLoaderRootsUpdater; + class LinearAllocPageUpdater; + class ImmuneSpaceUpdateObjVisitor; + class ConcurrentCompactionGcTask; + + DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact); +}; + +std::ostream& operator<<(std::ostream& os, MarkCompact::PageState value); + +} // namespace collector +} // namespace gc +} // namespace art + +#endif // ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index bd5ce37b2c..4fefe6557c 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -340,6 +340,8 @@ void MarkSweep::ReclaimPhase() { Thread* const self = Thread::Current(); // Process the references concurrently. ProcessReferences(self); + // There is no need to sweep interpreter caches as this GC doesn't move + // objects and hence would be a nop. SweepSystemWeaks(self); Runtime* const runtime = Runtime::Current(); runtime->AllowNewSystemWeaks(); @@ -1127,7 +1129,8 @@ void MarkSweep::VerifySystemWeaks() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // Verify system weaks, uses a special object visitor which returns the input object. VerifySystemWeakVisitor visitor(this); - Runtime::Current()->SweepSystemWeaks(&visitor); + Runtime* runtime = Runtime::Current(); + runtime->SweepSystemWeaks(&visitor); } class MarkSweep::CheckpointMarkThreadRoots : public Closure, public RootVisitor { @@ -1455,6 +1458,8 @@ inline mirror::Object* MarkSweep::IsMarked(mirror::Object* object) { if (current_space_bitmap_->HasAddress(object)) { return current_space_bitmap_->Test(object) ? object : nullptr; } + // This function returns nullptr for objects allocated after marking phase as + // they are not marked in the bitmap. return mark_bitmap_->Test(object) ? object : nullptr; } diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h index 6af7c54600..12fd7f9995 100644 --- a/runtime/gc/collector/mark_sweep.h +++ b/runtime/gc/collector/mark_sweep.h @@ -181,7 +181,7 @@ class MarkSweep : public GarbageCollector { REQUIRES_SHARED(Locks::heap_bitmap_lock_, Locks::mutator_lock_); void VerifySystemWeaks() - REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); + REQUIRES(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_); // Verify that an object is live, either in a live bitmap or in the allocation stack. void VerifyIsLive(const mirror::Object* obj) diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc index f6ca867e69..e283a9583a 100644 --- a/runtime/gc/collector/partial_mark_sweep.cc +++ b/runtime/gc/collector/partial_mark_sweep.cc @@ -18,7 +18,6 @@ #include "gc/heap.h" #include "gc/space/space.h" -#include "partial_mark_sweep.h" #include "thread-current-inl.h" namespace art { diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index 53b060483f..acd4807a4f 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -500,7 +500,9 @@ void SemiSpace::MarkRoots() { void SemiSpace::SweepSystemWeaks() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); - Runtime::Current()->SweepSystemWeaks(this); + Runtime* runtime = Runtime::Current(); + runtime->SweepSystemWeaks(this); + runtime->GetThreadList()->SweepInterpreterCaches(this); } bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const { diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h index 245ea10558..6d3ac0846e 100644 --- a/runtime/gc/collector/semi_space.h +++ b/runtime/gc/collector/semi_space.h @@ -143,7 +143,7 @@ class SemiSpace : public GarbageCollector { void SweepLargeObjects(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_); void SweepSystemWeaks() - REQUIRES_SHARED(Locks::heap_bitmap_lock_, Locks::mutator_lock_); + REQUIRES_SHARED(Locks::heap_bitmap_lock_) REQUIRES(Locks::mutator_lock_); void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) override REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h index 9c9996458c..c20e3a7347 100644 --- a/runtime/gc/collector_type.h +++ b/runtime/gc/collector_type.h @@ -30,6 +30,8 @@ enum CollectorType { kCollectorTypeMS, // Concurrent mark-sweep. kCollectorTypeCMS, + // Concurrent mark-compact. + kCollectorTypeCMC, // Semi-space / mark-sweep hybrid, enables compaction. kCollectorTypeSS, // Heap trimming collector, doesn't do any actual collecting. @@ -63,12 +65,13 @@ enum CollectorType { std::ostream& operator<<(std::ostream& os, CollectorType collector_type); static constexpr CollectorType kCollectorTypeDefault = -#if ART_DEFAULT_GC_TYPE_IS_CMS - kCollectorTypeCMS +#if ART_DEFAULT_GC_TYPE_IS_CMC + kCollectorTypeCMC #elif ART_DEFAULT_GC_TYPE_IS_SS kCollectorTypeSS -#else +#elif ART_DEFAULT_GC_TYPE_IS_CMS kCollectorTypeCMS +#else #error "ART default GC type must be set" #endif ; // NOLINT [whitespace/semicolon] [5] diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc index b197a99a20..02fe2f975c 100644 --- a/runtime/gc/gc_cause.cc +++ b/runtime/gc/gc_cause.cc @@ -46,7 +46,7 @@ const char* PrettyCause(GcCause cause) { case kGcCauseHprof: return "Hprof"; case kGcCauseGetObjectsAllocated: return "ObjectsAllocated"; case kGcCauseProfileSaver: return "ProfileSaver"; - case kGcCauseRunEmptyCheckpoint: return "RunEmptyCheckpoint"; + case kGcCauseDeletingDexCacheArrays: return "DeletingDexCacheArrays"; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h index 4dae585e4c..5c039b31ee 100644 --- a/runtime/gc/gc_cause.h +++ b/runtime/gc/gc_cause.h @@ -62,8 +62,8 @@ enum GcCause { kGcCauseGetObjectsAllocated, // GC cause for the profile saver. kGcCauseProfileSaver, - // GC cause for running an empty checkpoint. - kGcCauseRunEmptyCheckpoint, + // GC cause for deleting dex cache arrays at startup. + kGcCauseDeletingDexCacheArrays, }; const char* PrettyCause(GcCause cause); diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 9e1524e657..922b58870d 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -209,13 +209,12 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, } // IsGcConcurrent() isn't known at compile time so we can optimize by not checking it for the // BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be - // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant - // since the allocator_type should be constant propagated. - if (AllocatorMayHaveConcurrentGC(allocator) && IsGcConcurrent() - && UNLIKELY(ShouldConcurrentGCForJava(new_num_bytes_allocated))) { + // optimized out. + if (IsGcConcurrent() && UNLIKELY(ShouldConcurrentGCForJava(new_num_bytes_allocated))) { need_gc = true; } GetMetrics()->TotalBytesAllocated()->Add(bytes_tl_bulk_allocated); + GetMetrics()->TotalBytesAllocatedDelta()->Add(bytes_tl_bulk_allocated); } } if (kIsDebugBuild && Runtime::Current()->IsStarted()) { @@ -442,7 +441,7 @@ inline bool Heap::ShouldAllocLargeObject(ObjPtr<mirror::Class> c, size_t byte_co return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass()); } -inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, +inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type ATTRIBUTE_UNUSED, size_t alloc_size, bool grow) { size_t old_target = target_footprint_.load(std::memory_order_relaxed); @@ -457,7 +456,7 @@ inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, return true; } // We are between target_footprint_ and growth_limit_ . - if (AllocatorMayHaveConcurrentGC(allocator_type) && IsGcConcurrent()) { + if (IsGcConcurrent()) { return false; } else { if (grow) { diff --git a/runtime/gc/heap-visit-objects-inl.h b/runtime/gc/heap-visit-objects-inl.h index e20d981fa3..a235c44033 100644 --- a/runtime/gc/heap-visit-objects-inl.h +++ b/runtime/gc/heap-visit-objects-inl.h @@ -118,7 +118,7 @@ inline void Heap::VisitObjectsInternal(Visitor&& visitor) { // For speed reasons, only perform it when Rosalloc could possibly be used. // (Disabled for read barriers because it never uses Rosalloc). // (See the DCHECK in RosAllocSpace constructor). - if (!kUseReadBarrier) { + if (!gUseReadBarrier) { // Rosalloc has a race in allocation. Objects can be written into the allocation // stack before their header writes are visible to this thread. // See b/28790624 for more details. diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 8407ba4376..f27bddb361 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -21,10 +21,6 @@ #if defined(__BIONIC__) || defined(__GLIBC__) #include <malloc.h> // For mallinfo() #endif -#if defined(__BIONIC__) && defined(ART_TARGET) -#include <linux/userfaultfd.h> -#include <sys/ioctl.h> -#endif #include <memory> #include <random> #include <unistd.h> @@ -61,6 +57,7 @@ #include "gc/accounting/remembered_set.h" #include "gc/accounting/space_bitmap-inl.h" #include "gc/collector/concurrent_copying.h" +#include "gc/collector/mark_compact.h" #include "gc/collector/mark_sweep.h" #include "gc/collector/partial_mark_sweep.h" #include "gc/collector/semi_space.h" @@ -106,6 +103,7 @@ #include "runtime.h" #include "javaheapprof/javaheapsampler.h" #include "scoped_thread_state_change-inl.h" +#include "thread-inl.h" #include "thread_list.h" #include "verify_object-inl.h" #include "well_known_classes.h" @@ -339,6 +337,7 @@ Heap::Heap(size_t initial_size, // this one. process_state_update_lock_("process state update lock", kPostMonitorLock), min_foreground_target_footprint_(0), + min_foreground_concurrent_start_bytes_(0), concurrent_start_bytes_(std::numeric_limits<size_t>::max()), total_bytes_freed_ever_(0), total_objects_freed_ever_(0), @@ -410,7 +409,6 @@ Heap::Heap(size_t initial_size, backtrace_lock_(nullptr), seen_backtrace_count_(0u), unique_backtrace_count_(0u), - uffd_(-1), gc_disabled_for_shutdown_(false), dump_region_info_before_gc_(dump_region_info_before_gc), dump_region_info_after_gc_(dump_region_info_after_gc), @@ -421,7 +419,19 @@ Heap::Heap(size_t initial_size, if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { LOG(INFO) << "Heap() entering"; } - if (kUseReadBarrier) { + + LOG(INFO) << "Using " << foreground_collector_type_ << " GC."; + if (!gUseUserfaultfd) { + // This ensures that userfaultfd syscall is done before any seccomp filter is installed. + // TODO(b/266731037): Remove this when we no longer need to collect metric on userfaultfd + // support. + auto [uffd_supported, minor_fault_supported] = collector::MarkCompact::GetUffdAndMinorFault(); + // The check is just to ensure that compiler doesn't eliminate the function call above. + // Userfaultfd support is certain to be there if its minor-fault feature is supported. + CHECK_IMPLIES(minor_fault_supported, uffd_supported); + } + + if (gUseReadBarrier) { CHECK_EQ(foreground_collector_type_, kCollectorTypeCC); CHECK_EQ(background_collector_type_, kCollectorTypeCCBackground); } else if (background_collector_type_ != gc::kCollectorTypeHomogeneousSpaceCompact) { @@ -448,7 +458,8 @@ Heap::Heap(size_t initial_size, mark_bitmap_.reset(new accounting::HeapBitmap(this)); // We don't have hspace compaction enabled with CC. - if (foreground_collector_type_ == kCollectorTypeCC) { + if (foreground_collector_type_ == kCollectorTypeCC + || foreground_collector_type_ == kCollectorTypeCMC) { use_homogeneous_space_compaction_for_oom_ = false; } bool support_homogeneous_space_compaction = @@ -486,6 +497,7 @@ Heap::Heap(size_t initial_size, runtime->ShouldRelocate(), /*executable=*/ !runtime->IsAotCompiler(), heap_reservation_size, + runtime->AllowInMemoryCompilation(), &boot_image_spaces, &heap_reservation)) { DCHECK_EQ(heap_reservation_size, heap_reservation.IsValid() ? heap_reservation.Size() : 0u); @@ -629,10 +641,14 @@ Heap::Heap(size_t initial_size, std::move(main_mem_map_1)); CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space"; AddSpace(bump_pointer_space_); - temp_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 2", - std::move(main_mem_map_2)); - CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space"; - AddSpace(temp_space_); + // For Concurrent Mark-compact GC we don't need the temp space to be in + // lower 4GB. So its temp space will be created by the GC itself. + if (foreground_collector_type_ != kCollectorTypeCMC) { + temp_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 2", + std::move(main_mem_map_2)); + CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space"; + AddSpace(temp_space_); + } CHECK(separate_non_moving_space); } else { CreateMainMallocSpace(std::move(main_mem_map_1), initial_size, growth_limit_, capacity_); @@ -758,6 +774,10 @@ Heap::Heap(size_t initial_size, semi_space_collector_ = new collector::SemiSpace(this); garbage_collectors_.push_back(semi_space_collector_); } + if (MayUseCollector(kCollectorTypeCMC)) { + mark_compact_ = new collector::MarkCompact(this); + garbage_collectors_.push_back(mark_compact_); + } if (MayUseCollector(kCollectorTypeCC)) { concurrent_copying_collector_ = new collector::ConcurrentCopying(this, /*young_gen=*/false, @@ -963,7 +983,6 @@ void Heap::DecrementDisableMovingGC(Thread* self) { void Heap::IncrementDisableThreadFlip(Thread* self) { // Supposed to be called by mutators. If thread_flip_running_ is true, block. Otherwise, go ahead. - CHECK(kUseReadBarrier); bool is_nested = self->GetDisableThreadFlipCount() > 0; self->IncrementDisableThreadFlipCount(); if (is_nested) { @@ -994,10 +1013,23 @@ void Heap::IncrementDisableThreadFlip(Thread* self) { } } +void Heap::EnsureObjectUserfaulted(ObjPtr<mirror::Object> obj) { + if (gUseUserfaultfd) { + // Use volatile to ensure that compiler loads from memory to trigger userfaults, if required. + const uint8_t* start = reinterpret_cast<uint8_t*>(obj.Ptr()); + const uint8_t* end = AlignUp(start + obj->SizeOf(), kPageSize); + // The first page is already touched by SizeOf(). + start += kPageSize; + while (start < end) { + ForceRead(start); + start += kPageSize; + } + } +} + void Heap::DecrementDisableThreadFlip(Thread* self) { // Supposed to be called by mutators. Decrement disable_thread_flip_count_ and potentially wake up // the GC waiting before doing a thread flip. - CHECK(kUseReadBarrier); self->DecrementDisableThreadFlipCount(); bool is_outermost = self->GetDisableThreadFlipCount() == 0; if (!is_outermost) { @@ -1017,7 +1049,6 @@ void Heap::DecrementDisableThreadFlip(Thread* self) { void Heap::ThreadFlipBegin(Thread* self) { // Supposed to be called by GC. Set thread_flip_running_ to be true. If disable_thread_flip_count_ // > 0, block. Otherwise, go ahead. - CHECK(kUseReadBarrier); ScopedThreadStateChange tsc(self, ThreadState::kWaitingForGcThreadFlip); MutexLock mu(self, *thread_flip_lock_); thread_flip_cond_->CheckSafeToWait(self); @@ -1043,7 +1074,6 @@ void Heap::ThreadFlipBegin(Thread* self) { void Heap::ThreadFlipEnd(Thread* self) { // Supposed to be called by GC. Set thread_flip_running_ to false and potentially wake up mutators // waiting before doing a JNI critical. - CHECK(kUseReadBarrier); MutexLock mu(self, *thread_flip_lock_); CHECK(thread_flip_running_); thread_flip_running_ = false; @@ -1059,7 +1089,9 @@ void Heap::GrowHeapOnJankPerceptibleSwitch() { min_foreground_target_footprint_, std::memory_order_relaxed); } - min_foreground_target_footprint_ = 0; + if (IsGcConcurrent() && concurrent_start_bytes_ < min_foreground_concurrent_start_bytes_) { + concurrent_start_bytes_ = min_foreground_concurrent_start_bytes_; + } } void Heap::UpdateProcessState(ProcessState old_process_state, ProcessState new_process_state) { @@ -1070,26 +1102,32 @@ void Heap::UpdateProcessState(ProcessState old_process_state, ProcessState new_p RequestCollectorTransition(foreground_collector_type_, 0); GrowHeapOnJankPerceptibleSwitch(); } else { - // Don't delay for debug builds since we may want to stress test the GC. // If background_collector_type_ is kCollectorTypeHomogeneousSpaceCompact then we have // special handling which does a homogenous space compaction once but then doesn't transition // the collector. Similarly, we invoke a full compaction for kCollectorTypeCC but don't // transition the collector. - RequestCollectorTransition(background_collector_type_, - kStressCollectorTransition - ? 0 - : kCollectorTransitionWait); + RequestCollectorTransition(background_collector_type_, 0); } } } -void Heap::CreateThreadPool() { - const size_t num_threads = std::max(parallel_gc_threads_, conc_gc_threads_); +void Heap::CreateThreadPool(size_t num_threads) { + if (num_threads == 0) { + num_threads = std::max(parallel_gc_threads_, conc_gc_threads_); + } if (num_threads != 0) { thread_pool_.reset(new ThreadPool("Heap thread pool", num_threads)); } } +void Heap::WaitForWorkersToBeCreated() { + DCHECK(!Runtime::Current()->IsShuttingDown(Thread::Current())) + << "Cannot create new threads during runtime shutdown"; + if (thread_pool_ != nullptr) { + thread_pool_->WaitForWorkersToBeCreated(); + } +} + void Heap::MarkAllocStackAsLive(accounting::ObjectStack* stack) { space::ContinuousSpace* space1 = main_space_ != nullptr ? main_space_ : non_moving_space_; space::ContinuousSpace* space2 = non_moving_space_; @@ -1451,6 +1489,8 @@ void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType Runtime::Current()->GetPreAllocatedOutOfMemoryErrorWhenHandlingStackOverflow()); return; } + // Allow plugins to intercept out of memory errors. + Runtime::Current()->OutOfMemoryErrorHook(); std::ostringstream oss; size_t total_bytes_free = GetFreeMemory(); @@ -1497,6 +1537,23 @@ void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType void Heap::DoPendingCollectorTransition() { CollectorType desired_collector_type = desired_collector_type_; + + if (collector_type_ == kCollectorTypeCC || collector_type_ == kCollectorTypeCMC) { + // App's allocations (since last GC) more than the threshold then do TransitionGC + // when the app was in background. If not then don't do TransitionGC. + // num_bytes_allocated_since_gc should always be positive even if initially + // num_bytes_alive_after_gc_ is coming from Zygote. This gives positive or zero value. + size_t num_bytes_allocated_since_gc = + UnsignedDifference(GetBytesAllocated(), num_bytes_alive_after_gc_); + if (num_bytes_allocated_since_gc < + (UnsignedDifference(target_footprint_.load(std::memory_order_relaxed), + num_bytes_alive_after_gc_)/4) + && !kStressCollectorTransition + && !IsLowMemoryMode()) { + return; + } + } + // Launch homogeneous space compaction if it is desired. if (desired_collector_type == kCollectorTypeHomogeneousSpaceCompact) { if (!CareAboutPauseTimes()) { @@ -1504,15 +1561,15 @@ void Heap::DoPendingCollectorTransition() { } else { VLOG(gc) << "Homogeneous compaction ignored due to jank perceptible process state"; } - } else if (desired_collector_type == kCollectorTypeCCBackground) { - DCHECK(kUseReadBarrier); + } else if (desired_collector_type == kCollectorTypeCCBackground || + desired_collector_type == kCollectorTypeCMC) { if (!CareAboutPauseTimes()) { - // Invoke CC full compaction. + // Invoke full compaction. CollectGarbageInternal(collector::kGcTypeFull, kGcCauseCollectorTransition, - /*clear_soft_references=*/false, GC_NUM_ANY); + /*clear_soft_references=*/false, GetCurrentGcNum() + 1); } else { - VLOG(gc) << "CC background compaction ignored due to jank perceptible process state"; + VLOG(gc) << "background compaction ignored due to jank perceptible process state"; } } else { CHECK_EQ(desired_collector_type, collector_type_) << "Unsupported collector transition"; @@ -1761,7 +1818,7 @@ void Heap::VerifyObjectBody(ObjPtr<mirror::Object> obj) { void Heap::VerifyHeap() { ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); - auto visitor = [&](mirror::Object* obj) { + auto visitor = [&](mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS { VerifyObjectBody(obj); }; // Technically we need the mutator lock here to call Visit. However, VerifyObjectBody is already @@ -2199,6 +2256,15 @@ void Heap::ChangeCollector(CollectorType collector_type) { } break; } + case kCollectorTypeCMC: { + gc_plan_.push_back(collector::kGcTypeFull); + if (use_tlab_) { + ChangeAllocator(kAllocatorTypeTLAB); + } else { + ChangeAllocator(kAllocatorTypeBumpPointer); + } + break; + } case kCollectorTypeSS: { gc_plan_.push_back(collector::kGcTypeFull); if (use_tlab_) { @@ -2368,18 +2434,16 @@ void Heap::PreZygoteFork() { } // We need to close userfaultfd fd for app/webview zygotes to avoid getattr // (stat) on the fd during fork. - if (uffd_ >= 0) { - close(uffd_); - uffd_ = -1; - } Thread* self = Thread::Current(); MutexLock mu(self, zygote_creation_lock_); // Try to see if we have any Zygote spaces. if (HasZygoteSpace()) { return; } - Runtime::Current()->GetInternTable()->AddNewTable(); - Runtime::Current()->GetClassLinker()->MoveClassTableToPreZygote(); + Runtime* runtime = Runtime::Current(); + runtime->GetInternTable()->AddNewTable(); + runtime->GetClassLinker()->MoveClassTableToPreZygote(); + runtime->SetupLinearAllocForPostZygoteFork(self); VLOG(heap) << "Starting PreZygoteFork"; // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote // there. @@ -2488,7 +2552,7 @@ void Heap::PreZygoteFork() { new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space_); CHECK(mod_union_table != nullptr) << "Failed to create zygote space mod-union table"; - if (collector_type_ != kCollectorTypeCC) { + if (collector_type_ != kCollectorTypeCC && collector_type_ != kCollectorTypeCMC) { // Set all the cards in the mod-union table since we don't know which objects contain references // to large objects. mod_union_table->SetCards(); @@ -2500,10 +2564,10 @@ void Heap::PreZygoteFork() { mod_union_table->ProcessCards(); mod_union_table->ClearTable(); - // For CC we never collect zygote large objects. This means we do not need to set the cards for - // the zygote mod-union table and we can also clear all of the existing image mod-union tables. - // The existing mod-union tables are only for image spaces and may only reference zygote and - // image objects. + // For CC and CMC we never collect zygote large objects. This means we do not need to set the + // cards for the zygote mod-union table and we can also clear all of the existing image + // mod-union tables. The existing mod-union tables are only for image spaces and may only + // reference zygote and image objects. for (auto& pair : mod_union_tables_) { CHECK(pair.first->IsImageSpace()); CHECK(!pair.first->AsImageSpace()->GetImageHeader().IsAppImage()); @@ -2710,6 +2774,9 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, semi_space_collector_->SetSwapSemiSpaces(true); collector = semi_space_collector_; break; + case kCollectorTypeCMC: + collector = mark_compact_; + break; case kCollectorTypeCC: collector::ConcurrentCopying* active_cc_collector; if (use_generational_cc_) { @@ -2728,7 +2795,9 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, default: LOG(FATAL) << "Invalid collector type " << static_cast<size_t>(collector_type_); } - if (collector != active_concurrent_copying_collector_.load(std::memory_order_relaxed)) { + // temp_space_ will be null for kCollectorTypeCMC. + if (temp_space_ != nullptr + && collector != active_concurrent_copying_collector_.load(std::memory_order_relaxed)) { temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); if (kIsDebugBuild) { // Try to read each page of the memory map in case mprotect didn't work properly b/19894268. @@ -3561,6 +3630,15 @@ collector::GcType Heap::WaitForGcToCompleteLocked(GcCause cause, Thread* self) { void Heap::DumpForSigQuit(std::ostream& os) { os << "Heap: " << GetPercentFree() << "% free, " << PrettySize(GetBytesAllocated()) << "/" << PrettySize(GetTotalMemory()) << "; " << GetObjectsAllocated() << " objects\n"; + { + os << "Image spaces:\n"; + ScopedObjectAccess soa(Thread::Current()); + for (const auto& space : continuous_spaces_) { + if (space->IsImageSpace()) { + os << space->GetName() << "\n"; + } + } + } DumpGcPerformanceInfo(os); } @@ -3680,7 +3758,9 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, // process-state switch. min_foreground_target_footprint_ = (multiplier <= 1.0 && grow_bytes > 0) - ? bytes_allocated + static_cast<size_t>(grow_bytes * foreground_heap_growth_multiplier_) + ? std::min( + bytes_allocated + static_cast<size_t>(grow_bytes * foreground_heap_growth_multiplier_), + GetMaxMemory()) : 0; if (IsGcConcurrent()) { @@ -3712,6 +3792,12 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, // allocation rate is very high, remaining_bytes could tell us that we should start a GC // right away. concurrent_start_bytes_ = std::max(target_footprint - remaining_bytes, bytes_allocated); + // Store concurrent_start_bytes_ (computed with foreground heap growth multiplier) for update + // itself when process state switches to foreground. + min_foreground_concurrent_start_bytes_ = + min_foreground_target_footprint_ != 0 + ? std::max(min_foreground_target_footprint_ - remaining_bytes, bytes_allocated) + : 0; } } } @@ -3762,12 +3848,11 @@ void Heap::ClearGrowthLimit() { void Heap::AddFinalizerReference(Thread* self, ObjPtr<mirror::Object>* object) { ScopedObjectAccess soa(self); - ScopedLocalRef<jobject> arg(self->GetJniEnv(), soa.AddLocalReference<jobject>(*object)); - jvalue args[1]; - args[0].l = arg.get(); - InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_FinalizerReference_add, args); - // Restore object in case it gets moved. - *object = soa.Decode<mirror::Object>(arg.get()); + StackHandleScope<1u> hs(self); + // Use handle wrapper to update the `*object` if the object gets moved. + HandleWrapperObjPtr<mirror::Object> h_object = hs.NewHandleWrapper(object); + WellKnownClasses::java_lang_ref_FinalizerReference_add->InvokeStatic<'V', 'L'>( + self, h_object.Get()); } void Heap::RequestConcurrentGCAndSaveObject(Thread* self, @@ -3829,70 +3914,6 @@ bool Heap::RequestConcurrentGC(Thread* self, return true; // Vacuously. } -#if defined(__BIONIC__) && defined(ART_TARGET) -void Heap::MaybePerformUffdIoctls(GcCause cause, uint32_t requested_gc_num) const { - if (uffd_ >= 0 - && cause == kGcCauseBackground - && (requested_gc_num < 5 || requested_gc_num % 5 == 0)) { - // Attempt to use all userfaultfd ioctls that we intend to use. - // Register ioctl - { - struct uffdio_register uffd_register; - uffd_register.range.start = 0; - uffd_register.range.len = 0; - uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; - int ret = ioctl(uffd_, UFFDIO_REGISTER, &uffd_register); - CHECK_EQ(ret, -1); - CHECK_EQ(errno, EINVAL); - } - // Copy ioctl - { - struct uffdio_copy uffd_copy = {.src = 0, .dst = 0, .len = 0, .mode = 0}; - int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy); - CHECK_EQ(ret, -1); - CHECK_EQ(errno, EINVAL); - } - // Zeropage ioctl - { - struct uffdio_zeropage uffd_zeropage; - uffd_zeropage.range.start = 0; - uffd_zeropage.range.len = 0; - uffd_zeropage.mode = 0; - int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage); - CHECK_EQ(ret, -1); - CHECK_EQ(errno, EINVAL); - } - // Continue ioctl - { - struct uffdio_continue uffd_continue; - uffd_continue.range.start = 0; - uffd_continue.range.len = 0; - uffd_continue.mode = 0; - int ret = ioctl(uffd_, UFFDIO_CONTINUE, &uffd_continue); - CHECK_EQ(ret, -1); - CHECK_EQ(errno, EINVAL); - } - // Wake ioctl - { - struct uffdio_range uffd_range = {.start = 0, .len = 0}; - int ret = ioctl(uffd_, UFFDIO_WAKE, &uffd_range); - CHECK_EQ(ret, -1); - CHECK_EQ(errno, EINVAL); - } - // Unregister ioctl - { - struct uffdio_range uffd_range = {.start = 0, .len = 0}; - int ret = ioctl(uffd_, UFFDIO_UNREGISTER, &uffd_range); - CHECK_EQ(ret, -1); - CHECK_EQ(errno, EINVAL); - } - } -} -#else -void Heap::MaybePerformUffdIoctls(GcCause cause ATTRIBUTE_UNUSED, - uint32_t requested_gc_num ATTRIBUTE_UNUSED) const {} -#endif - void Heap::ConcurrentGC(Thread* self, GcCause cause, bool force_full, uint32_t requested_gc_num) { if (!Runtime::Current()->IsShuttingDown(self)) { // Wait for any GCs currently running to finish. If this incremented GC number, we're done. @@ -3905,7 +3926,7 @@ void Heap::ConcurrentGC(Thread* self, GcCause cause, bool force_full, uint32_t r } // If we can't run the GC type we wanted to run, find the next appropriate one and try // that instead. E.g. can't do partial, so do full instead. - // We must ensure that we run something that ends up inrementing gcs_completed_. + // We must ensure that we run something that ends up incrementing gcs_completed_. // In the kGcTypePartial case, the initial CollectGarbageInternal call may not have that // effect, but the subsequent KGcTypeFull call will. if (CollectGarbageInternal(next_gc_type, cause, false, requested_gc_num) @@ -3919,12 +3940,9 @@ void Heap::ConcurrentGC(Thread* self, GcCause cause, bool force_full, uint32_t r if (gc_type > next_gc_type && CollectGarbageInternal(gc_type, cause, false, requested_gc_num) != collector::kGcTypeNone) { - MaybePerformUffdIoctls(cause, requested_gc_num); break; } } - } else { - MaybePerformUffdIoctls(cause, requested_gc_num); } } } @@ -3956,16 +3974,6 @@ void Heap::RequestCollectorTransition(CollectorType desired_collector_type, uint // For CC, we invoke a full compaction when going to the background, but the collector type // doesn't change. DCHECK_EQ(desired_collector_type_, kCollectorTypeCCBackground); - // App's allocations (since last GC) more than the threshold then do TransitionGC - // when the app was in background. If not then don't do TransitionGC. - size_t num_bytes_allocated_since_gc = GetBytesAllocated() - num_bytes_alive_after_gc_; - if (num_bytes_allocated_since_gc < - (UnsignedDifference(target_footprint_.load(std::memory_order_relaxed), - num_bytes_alive_after_gc_)/4) - && !kStressCollectorTransition - && !IsLowMemoryMode()) { - return; - } } DCHECK_NE(collector_type_, kCollectorTypeCCBackground); CollectorTransitionTask* added_task = nullptr; @@ -4076,12 +4084,6 @@ void Heap::RevokeAllThreadLocalBuffers() { } } -void Heap::RunFinalization(JNIEnv* env, uint64_t timeout) { - env->CallStaticVoidMethod(WellKnownClasses::dalvik_system_VMRuntime, - WellKnownClasses::dalvik_system_VMRuntime_runFinalization, - static_cast<jlong>(timeout)); -} - // For GC triggering purposes, we count old (pre-last-GC) and new native allocations as // different fractions of Java allocations. // For now, we essentially do not count old native allocations at all, so that we can preserve the @@ -4167,7 +4169,7 @@ inline void Heap::CheckGCForNative(Thread* self) { // About kNotifyNativeInterval allocations have occurred. Check whether we should garbage collect. void Heap::NotifyNativeAllocations(JNIEnv* env) { native_objects_notified_.fetch_add(kNotifyNativeInterval, std::memory_order_relaxed); - CheckGCForNative(ThreadForEnv(env)); + CheckGCForNative(Thread::ForEnv(env)); } // Register a native allocation with an explicit size. @@ -4181,7 +4183,7 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { native_objects_notified_.fetch_add(1, std::memory_order_relaxed); if (objects_notified % kNotifyNativeInterval == kNotifyNativeInterval - 1 || bytes > kCheckImmediatelyThreshold) { - CheckGCForNative(ThreadForEnv(env)); + CheckGCForNative(Thread::ForEnv(env)); } // Heap profiler treats this as a Java allocation with a null object. JHPCheckNonTlabSampleAllocation(Thread::Current(), nullptr, bytes); @@ -4280,7 +4282,7 @@ void Heap::SweepAllocationRecords(IsMarkedVisitor* visitor) const { } void Heap::AllowNewAllocationRecords() const { - CHECK(!kUseReadBarrier); + CHECK(!gUseReadBarrier); MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); AllocRecordObjectMap* allocation_records = GetAllocationRecords(); if (allocation_records != nullptr) { @@ -4289,7 +4291,7 @@ void Heap::AllowNewAllocationRecords() const { } void Heap::DisallowNewAllocationRecords() const { - CHECK(!kUseReadBarrier); + CHECK(!gUseReadBarrier); MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); AllocRecordObjectMap* allocation_records = GetAllocationRecords(); if (allocation_records != nullptr) { @@ -4412,12 +4414,15 @@ void Heap::CheckGcStressMode(Thread* self, ObjPtr<mirror::Object>* obj) { } void Heap::DisableGCForShutdown() { - Thread* const self = Thread::Current(); - CHECK(Runtime::Current()->IsShuttingDown(self)); - MutexLock mu(self, *gc_complete_lock_); + MutexLock mu(Thread::Current(), *gc_complete_lock_); gc_disabled_for_shutdown_ = true; } +bool Heap::IsGCDisabledForShutdown() const { + MutexLock mu(Thread::Current(), *gc_complete_lock_); + return gc_disabled_for_shutdown_; +} + bool Heap::ObjectIsInBootImageSpace(ObjPtr<mirror::Object> obj) const { DCHECK_EQ(IsBootImageAddress(obj.Ptr()), any_of(boot_image_spaces_.begin(), @@ -4494,8 +4499,13 @@ mirror::Object* Heap::AllocWithNewTLAB(Thread* self, DCHECK_LE(alloc_size, self->TlabSize()); } else if (allocator_type == kAllocatorTypeTLAB) { DCHECK(bump_pointer_space_ != nullptr); + // Try to allocate a page-aligned TLAB (not necessary though). + // TODO: for large allocations, which are rare, maybe we should allocate + // that object and return. There is no need to revoke the current TLAB, + // particularly if it's mostly unutilized. + size_t def_pr_tlab_size = RoundDown(alloc_size + kDefaultTLABSize, kPageSize) - alloc_size; size_t next_tlab_size = JHPCalculateNextTlabSize(self, - kDefaultTLABSize, + def_pr_tlab_size, alloc_size, &take_sample, &bytes_until_sample); @@ -4658,42 +4668,33 @@ void Heap::PostForkChildAction(Thread* self) { uint64_t last_adj_time = NanoTime(); next_gc_type_ = NonStickyGcType(); // Always start with a full gc. -#if defined(__BIONIC__) && defined(ART_TARGET) - uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); - if (uffd_ >= 0) { - struct uffdio_api api = {.api = UFFD_API, .features = 0}; - int ret = ioctl(uffd_, UFFDIO_API, &api); - CHECK_EQ(ret, 0) << "ioctl_userfaultfd: API: " << strerror(errno); - } else { - // The syscall should fail only if it doesn't exist in the kernel or if it's - // denied by SELinux. - CHECK(errno == ENOSYS || errno == EACCES) << "userfaultfd: " << strerror(errno); + LOG(INFO) << "Using " << foreground_collector_type_ << " GC."; + if (gUseUserfaultfd) { + DCHECK_NE(mark_compact_, nullptr); + mark_compact_->CreateUserfaultfd(/*post_fork*/true); } -#endif // Temporarily increase target_footprint_ and concurrent_start_bytes_ to // max values to avoid GC during app launch. - if (!IsLowMemoryMode()) { - // Set target_footprint_ to the largest allowed value. - SetIdealFootprint(growth_limit_); - SetDefaultConcurrentStartBytes(); - - // Shrink heap after kPostForkMaxHeapDurationMS, to force a memory hog process to GC. - // This remains high enough that many processes will continue without a GC. - if (initial_heap_size_ < growth_limit_) { - size_t first_shrink_size = std::max(growth_limit_ / 4, initial_heap_size_); - last_adj_time += MsToNs(kPostForkMaxHeapDurationMS); + // Set target_footprint_ to the largest allowed value. + SetIdealFootprint(growth_limit_); + SetDefaultConcurrentStartBytes(); + + // Shrink heap after kPostForkMaxHeapDurationMS, to force a memory hog process to GC. + // This remains high enough that many processes will continue without a GC. + if (initial_heap_size_ < growth_limit_) { + size_t first_shrink_size = std::max(growth_limit_ / 4, initial_heap_size_); + last_adj_time += MsToNs(kPostForkMaxHeapDurationMS); + GetTaskProcessor()->AddTask( + self, new ReduceTargetFootprintTask(last_adj_time, first_shrink_size, starting_gc_num)); + // Shrink to a small value after a substantial time period. This will typically force a + // GC if none has occurred yet. Has no effect if there was a GC before this anyway, which + // is commonly the case, e.g. because of a process transition. + if (initial_heap_size_ < first_shrink_size) { + last_adj_time += MsToNs(4 * kPostForkMaxHeapDurationMS); GetTaskProcessor()->AddTask( - self, new ReduceTargetFootprintTask(last_adj_time, first_shrink_size, starting_gc_num)); - // Shrink to a small value after a substantial time period. This will typically force a - // GC if none has occurred yet. Has no effect if there was a GC before this anyway, which - // is commonly the case, e.g. because of a process transition. - if (initial_heap_size_ < first_shrink_size) { - last_adj_time += MsToNs(4 * kPostForkMaxHeapDurationMS); - GetTaskProcessor()->AddTask( - self, - new ReduceTargetFootprintTask(last_adj_time, initial_heap_size_, starting_gc_num)); - } + self, + new ReduceTargetFootprintTask(last_adj_time, initial_heap_size_, starting_gc_num)); } } // Schedule a GC after a substantial period of time. This will become a no-op if another GC is diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 232c96b914..31a1b2b6a2 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -34,6 +34,7 @@ #include "base/time_utils.h" #include "gc/collector/gc_type.h" #include "gc/collector/iteration.h" +#include "gc/collector/mark_compact.h" #include "gc/collector_type.h" #include "gc/gc_cause.h" #include "gc/space/large_object_space.h" @@ -150,7 +151,7 @@ class Heap { static constexpr size_t kMinLargeObjectThreshold = 3 * kPageSize; static constexpr size_t kDefaultLargeObjectThreshold = kMinLargeObjectThreshold; // Whether or not parallel GC is enabled. If not, then we never create the thread pool. - static constexpr bool kDefaultEnableParallelGC = false; + static constexpr bool kDefaultEnableParallelGC = true; static uint8_t* const kPreferredAllocSpaceBegin; // Whether or not we use the free list large object space. Only use it if USE_ART_LOW_4G_ALLOCATOR @@ -181,10 +182,8 @@ class Heap { // How often we allow heap trimming to happen (nanoseconds). static constexpr uint64_t kHeapTrimWait = MsToNs(5000); - // How long we wait after a transition request to perform a collector transition (nanoseconds). - static constexpr uint64_t kCollectorTransitionWait = MsToNs(5000); - // Whether the transition-wait applies or not. Zero wait will stress the - // transition code and collector, but increases jank probability. + // Whether the transition-GC heap threshold condition applies or not for non-low memory devices. + // Stressing GC will bypass the heap threshold condition. DECLARE_RUNTIME_DEBUG_FLAG(kStressCollectorTransition); // Create a heap with the requested sizes. The possible empty @@ -385,6 +384,9 @@ class Heap { void ThreadFlipBegin(Thread* self) REQUIRES(!*thread_flip_lock_); void ThreadFlipEnd(Thread* self) REQUIRES(!*thread_flip_lock_); + // Ensures that the obj doesn't cause userfaultfd in JNI critical calls. + void EnsureObjectUserfaulted(ObjPtr<mirror::Object> obj) REQUIRES_SHARED(Locks::mutator_lock_); + // Clear all of the mark bits, doesn't clear bitmaps which have the same live bits as mark bits. // Mutator lock is required for GetContinuousSpaces. void ClearMarkedObjects() @@ -578,6 +580,9 @@ class Heap { return region_space_; } + space::BumpPointerSpace* GetBumpPointerSpace() const { + return bump_pointer_space_; + } // Implements java.lang.Runtime.maxMemory, returning the maximum amount of memory a program can // consume. For a regular VM this would relate to the -Xmx option and would return -1 if no Xmx // were specified. Android apps start with a growth limit (small heap size) which is @@ -661,6 +666,10 @@ class Heap { return live_stack_.get(); } + accounting::ObjectStack* GetAllocationStack() REQUIRES_SHARED(Locks::heap_bitmap_lock_) { + return allocation_stack_.get(); + } + void PreZygoteFork() NO_THREAD_SAFETY_ANALYSIS; // Mark and empty stack. @@ -760,8 +769,10 @@ class Heap { REQUIRES(!*gc_complete_lock_); void ResetGcPerformanceInfo() REQUIRES(!*gc_complete_lock_); - // Thread pool. - void CreateThreadPool(); + // Thread pool. Create either the given number of threads, or as per the + // values of conc_gc_threads_ and parallel_gc_threads_. + void CreateThreadPool(size_t num_threads = 0); + void WaitForWorkersToBeCreated(); void DeleteThreadPool(); ThreadPool* GetThreadPool() { return thread_pool_.get(); @@ -812,10 +823,22 @@ class Heap { return active_collector; } - CollectorType CurrentCollectorType() { + collector::MarkCompact* MarkCompactCollector() { + DCHECK(!gUseUserfaultfd || mark_compact_ != nullptr); + return mark_compact_; + } + + bool IsPerformingUffdCompaction() { return gUseUserfaultfd && mark_compact_->IsCompacting(); } + + CollectorType CurrentCollectorType() const { + DCHECK(!gUseUserfaultfd || collector_type_ == kCollectorTypeCMC); return collector_type_; } + bool IsMovingGc() const { return IsMovingGc(CurrentCollectorType()); } + + CollectorType GetForegroundCollectorType() const { return foreground_collector_type_; } + bool IsGcConcurrentAndMoving() const { if (IsGcConcurrent() && IsMovingGc(collector_type_)) { // Assume no transition when a concurrent moving collector is used. @@ -939,6 +962,7 @@ class Heap { REQUIRES(!Locks::alloc_tracker_lock_); void DisableGCForShutdown() REQUIRES(!*gc_complete_lock_); + bool IsGCDisabledForShutdown() const REQUIRES(!*gc_complete_lock_); // Create a new alloc space and compact default alloc space to it. HomogeneousSpaceCompactResult PerformHomogeneousSpaceCompact() @@ -1001,9 +1025,6 @@ class Heap { return main_space_backup_ != nullptr; } - // Attempt to use all the userfaultfd related ioctls. - void MaybePerformUffdIoctls(GcCause cause, uint32_t requested_gc_num) const; - // Size_t saturating arithmetic static ALWAYS_INLINE size_t UnsignedDifference(size_t x, size_t y) { return x > y ? x - y : 0; @@ -1019,19 +1040,11 @@ class Heap { allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRegion; } - static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) { - if (kUseReadBarrier) { - // Read barrier may have the TLAB allocator but is always concurrent. TODO: clean this up. - return true; - } - return - allocator_type != kAllocatorTypeTLAB && - allocator_type != kAllocatorTypeBumpPointer; - } static bool IsMovingGc(CollectorType collector_type) { return collector_type == kCollectorTypeCC || collector_type == kCollectorTypeSS || + collector_type == kCollectorTypeCMC || collector_type == kCollectorTypeCCBackground || collector_type == kCollectorTypeHomogeneousSpaceCompact; } @@ -1117,9 +1130,6 @@ class Heap { size_t alloc_size, bool grow); - // Run the finalizers. If timeout is non zero, then we use the VMRuntime version. - void RunFinalization(JNIEnv* env, uint64_t timeout); - // Blocks the caller until the garbage collector becomes idle and returns the type of GC we // waited for. collector::GcType WaitForGcToCompleteLocked(GcCause cause, Thread* self) @@ -1223,6 +1233,7 @@ class Heap { // sweep GC, false for other GC types. bool IsGcConcurrent() const ALWAYS_INLINE { return collector_type_ == kCollectorTypeCC || + collector_type_ == kCollectorTypeCMC || collector_type_ == kCollectorTypeCMS || collector_type_ == kCollectorTypeCCBackground; } @@ -1326,7 +1337,7 @@ class Heap { // The current collector type. CollectorType collector_type_; // Which collector we use when the app is in the foreground. - CollectorType foreground_collector_type_; + const CollectorType foreground_collector_type_; // Which collector we will use when the app is notified of a transition to background. CollectorType background_collector_type_; // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_. @@ -1437,8 +1448,9 @@ class Heap { // Computed with foreground-multiplier in GrowForUtilization() when run in // jank non-perceptible state. On update to process state from background to - // foreground we set target_footprint_ to this value. + // foreground we set target_footprint_ and concurrent_start_bytes_ to the corresponding value. size_t min_foreground_target_footprint_ GUARDED_BY(process_state_update_lock_); + size_t min_foreground_concurrent_start_bytes_ GUARDED_BY(process_state_update_lock_); // When num_bytes_allocated_ exceeds this amount then a concurrent GC should be requested so that // it completes ahead of an allocation failing. @@ -1588,6 +1600,7 @@ class Heap { std::vector<collector::GarbageCollector*> garbage_collectors_; collector::SemiSpace* semi_space_collector_; + collector::MarkCompact* mark_compact_; Atomic<collector::ConcurrentCopying*> active_concurrent_copying_collector_; collector::ConcurrentCopying* young_concurrent_copying_collector_; collector::ConcurrentCopying* concurrent_copying_collector_; @@ -1680,9 +1693,6 @@ class Heap { // Stack trace hashes that we already saw, std::unordered_set<uint64_t> seen_backtraces_ GUARDED_BY(backtrace_lock_); - // Userfaultfd file descriptor. - // TODO (lokeshgidra): remove this when the userfaultfd-based GC is in use. - int uffd_; // We disable GC when we are shutting down the runtime in case there are daemon threads still // allocating. bool gc_disabled_for_shutdown_ GUARDED_BY(gc_complete_lock_); @@ -1712,6 +1722,7 @@ class Heap { friend class CollectorTransitionTask; friend class collector::GarbageCollector; friend class collector::ConcurrentCopying; + friend class collector::MarkCompact; friend class collector::MarkSweep; friend class collector::SemiSpace; friend class GCCriticalSection; diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc index 5e8c1e368a..b569241bdc 100644 --- a/runtime/gc/heap_test.cc +++ b/runtime/gc/heap_test.cc @@ -14,6 +14,9 @@ * limitations under the License. */ +#include <algorithm> + +#include "base/metrics/metrics.h" #include "class_linker-inl.h" #include "common_runtime_test.h" #include "gc/accounting/card_table-inl.h" @@ -30,6 +33,10 @@ namespace gc { class HeapTest : public CommonRuntimeTest { public: + HeapTest() { + use_boot_image_ = true; // Make the Runtime creation cheaper. + } + void SetUp() override { MemMap::Init(); std::string error_msg; @@ -99,7 +106,160 @@ TEST_F(HeapTest, DumpGCPerformanceOnShutdown) { Runtime::Current()->SetDumpGCPerformanceOnShutdown(true); } +bool AnyIsFalse(bool x, bool y) { return !x || !y; } + +TEST_F(HeapTest, GCMetrics) { + // Allocate a few string objects (to be collected), then trigger garbage + // collection, and check that GC metrics are updated (where applicable). + { + constexpr const size_t kNumObj = 128; + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<kNumObj> hs(soa.Self()); + for (size_t i = 0u; i < kNumObj; ++i) { + Handle<mirror::String> string [[maybe_unused]] ( + hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "test"))); + } + } + Heap* heap = Runtime::Current()->GetHeap(); + heap->CollectGarbage(/* clear_soft_references= */ false); + + // ART Metrics. + metrics::ArtMetrics* metrics = Runtime::Current()->GetMetrics(); + // ART full-heap GC metrics. + metrics::MetricsBase<int64_t>* full_gc_collection_time = metrics->FullGcCollectionTime(); + metrics::MetricsBase<uint64_t>* full_gc_count = metrics->FullGcCount(); + metrics::MetricsBase<uint64_t>* full_gc_count_delta = metrics->FullGcCountDelta(); + metrics::MetricsBase<int64_t>* full_gc_throughput = metrics->FullGcThroughput(); + metrics::MetricsBase<int64_t>* full_gc_tracing_throughput = metrics->FullGcTracingThroughput(); + metrics::MetricsBase<uint64_t>* full_gc_throughput_avg = metrics->FullGcThroughputAvg(); + metrics::MetricsBase<uint64_t>* full_gc_tracing_throughput_avg = + metrics->FullGcTracingThroughputAvg(); + metrics::MetricsBase<uint64_t>* full_gc_scanned_bytes = metrics->FullGcScannedBytes(); + metrics::MetricsBase<uint64_t>* full_gc_scanned_bytes_delta = metrics->FullGcScannedBytesDelta(); + metrics::MetricsBase<uint64_t>* full_gc_freed_bytes = metrics->FullGcFreedBytes(); + metrics::MetricsBase<uint64_t>* full_gc_freed_bytes_delta = metrics->FullGcFreedBytesDelta(); + metrics::MetricsBase<uint64_t>* full_gc_duration = metrics->FullGcDuration(); + metrics::MetricsBase<uint64_t>* full_gc_duration_delta = metrics->FullGcDurationDelta(); + // ART young-generation GC metrics. + metrics::MetricsBase<int64_t>* young_gc_collection_time = metrics->YoungGcCollectionTime(); + metrics::MetricsBase<uint64_t>* young_gc_count = metrics->YoungGcCount(); + metrics::MetricsBase<uint64_t>* young_gc_count_delta = metrics->YoungGcCountDelta(); + metrics::MetricsBase<int64_t>* young_gc_throughput = metrics->YoungGcThroughput(); + metrics::MetricsBase<int64_t>* young_gc_tracing_throughput = metrics->YoungGcTracingThroughput(); + metrics::MetricsBase<uint64_t>* young_gc_throughput_avg = metrics->YoungGcThroughputAvg(); + metrics::MetricsBase<uint64_t>* young_gc_tracing_throughput_avg = + metrics->YoungGcTracingThroughputAvg(); + metrics::MetricsBase<uint64_t>* young_gc_scanned_bytes = metrics->YoungGcScannedBytes(); + metrics::MetricsBase<uint64_t>* young_gc_scanned_bytes_delta = + metrics->YoungGcScannedBytesDelta(); + metrics::MetricsBase<uint64_t>* young_gc_freed_bytes = metrics->YoungGcFreedBytes(); + metrics::MetricsBase<uint64_t>* young_gc_freed_bytes_delta = metrics->YoungGcFreedBytesDelta(); + metrics::MetricsBase<uint64_t>* young_gc_duration = metrics->YoungGcDuration(); + metrics::MetricsBase<uint64_t>* young_gc_duration_delta = metrics->YoungGcDurationDelta(); + + CollectorType fg_collector_type = heap->GetForegroundCollectorType(); + if (fg_collector_type == kCollectorTypeCC || fg_collector_type == kCollectorTypeCMC) { + // Only the Concurrent Copying and Concurrent Mark-Compact collectors enable + // GC metrics at the moment. + if (heap->GetUseGenerationalCC()) { + // Check that full-heap and/or young-generation GC metrics are non-null + // after trigerring the collection. + EXPECT_PRED2( + AnyIsFalse, full_gc_collection_time->IsNull(), young_gc_collection_time->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_count->IsNull(), young_gc_count->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_count_delta->IsNull(), young_gc_count_delta->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_throughput->IsNull(), young_gc_throughput->IsNull()); + EXPECT_PRED2( + AnyIsFalse, full_gc_tracing_throughput->IsNull(), young_gc_tracing_throughput->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_throughput_avg->IsNull(), young_gc_throughput_avg->IsNull()); + EXPECT_PRED2(AnyIsFalse, + full_gc_tracing_throughput_avg->IsNull(), + young_gc_tracing_throughput_avg->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_scanned_bytes->IsNull(), young_gc_scanned_bytes->IsNull()); + EXPECT_PRED2(AnyIsFalse, + full_gc_scanned_bytes_delta->IsNull(), + young_gc_scanned_bytes_delta->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_freed_bytes->IsNull(), young_gc_freed_bytes->IsNull()); + EXPECT_PRED2( + AnyIsFalse, full_gc_freed_bytes_delta->IsNull(), young_gc_freed_bytes_delta->IsNull()); + // We have observed that sometimes the GC duration (both for full-heap and + // young-generation collections) is null (b/271112044). Temporarily + // suspend the following checks while we investigate. + // + // TODO(b/271112044): Investigate and adjust these expectations and/or the + // corresponding metric logic. +#if 0 + EXPECT_PRED2(AnyIsFalse, full_gc_duration->IsNull(), young_gc_duration->IsNull()); + EXPECT_PRED2(AnyIsFalse, full_gc_duration_delta->IsNull(), young_gc_duration_delta->IsNull()); +#endif + } else { + // Check that only full-heap GC metrics are non-null after trigerring the collection. + EXPECT_FALSE(full_gc_collection_time->IsNull()); + EXPECT_FALSE(full_gc_count->IsNull()); + EXPECT_FALSE(full_gc_count_delta->IsNull()); + EXPECT_FALSE(full_gc_throughput->IsNull()); + EXPECT_FALSE(full_gc_tracing_throughput->IsNull()); + EXPECT_FALSE(full_gc_throughput_avg->IsNull()); + EXPECT_FALSE(full_gc_tracing_throughput_avg->IsNull()); + EXPECT_FALSE(full_gc_scanned_bytes->IsNull()); + EXPECT_FALSE(full_gc_scanned_bytes_delta->IsNull()); + EXPECT_FALSE(full_gc_freed_bytes->IsNull()); + EXPECT_FALSE(full_gc_freed_bytes_delta->IsNull()); + EXPECT_FALSE(full_gc_duration->IsNull()); + EXPECT_FALSE(full_gc_duration_delta->IsNull()); + + EXPECT_TRUE(young_gc_collection_time->IsNull()); + EXPECT_TRUE(young_gc_count->IsNull()); + EXPECT_TRUE(young_gc_count_delta->IsNull()); + EXPECT_TRUE(young_gc_throughput->IsNull()); + EXPECT_TRUE(young_gc_tracing_throughput->IsNull()); + EXPECT_TRUE(young_gc_throughput_avg->IsNull()); + EXPECT_TRUE(young_gc_tracing_throughput_avg->IsNull()); + EXPECT_TRUE(young_gc_scanned_bytes->IsNull()); + EXPECT_TRUE(young_gc_scanned_bytes_delta->IsNull()); + EXPECT_TRUE(young_gc_freed_bytes->IsNull()); + EXPECT_TRUE(young_gc_freed_bytes_delta->IsNull()); + EXPECT_TRUE(young_gc_duration->IsNull()); + EXPECT_TRUE(young_gc_duration_delta->IsNull()); + } + } else { + // Check that all metrics are null after trigerring the collection. + EXPECT_TRUE(full_gc_collection_time->IsNull()); + EXPECT_TRUE(full_gc_count->IsNull()); + EXPECT_TRUE(full_gc_count_delta->IsNull()); + EXPECT_TRUE(full_gc_throughput->IsNull()); + EXPECT_TRUE(full_gc_tracing_throughput->IsNull()); + EXPECT_TRUE(full_gc_throughput_avg->IsNull()); + EXPECT_TRUE(full_gc_tracing_throughput_avg->IsNull()); + EXPECT_TRUE(full_gc_scanned_bytes->IsNull()); + EXPECT_TRUE(full_gc_scanned_bytes_delta->IsNull()); + EXPECT_TRUE(full_gc_freed_bytes->IsNull()); + EXPECT_TRUE(full_gc_freed_bytes_delta->IsNull()); + EXPECT_TRUE(full_gc_duration->IsNull()); + EXPECT_TRUE(full_gc_duration_delta->IsNull()); + + EXPECT_TRUE(young_gc_collection_time->IsNull()); + EXPECT_TRUE(young_gc_count->IsNull()); + EXPECT_TRUE(young_gc_count_delta->IsNull()); + EXPECT_TRUE(young_gc_throughput->IsNull()); + EXPECT_TRUE(young_gc_tracing_throughput->IsNull()); + EXPECT_TRUE(young_gc_throughput_avg->IsNull()); + EXPECT_TRUE(young_gc_tracing_throughput_avg->IsNull()); + EXPECT_TRUE(young_gc_scanned_bytes->IsNull()); + EXPECT_TRUE(young_gc_scanned_bytes_delta->IsNull()); + EXPECT_TRUE(young_gc_freed_bytes->IsNull()); + EXPECT_TRUE(young_gc_freed_bytes_delta->IsNull()); + EXPECT_TRUE(young_gc_duration->IsNull()); + EXPECT_TRUE(young_gc_duration_delta->IsNull()); + } +} + class ZygoteHeapTest : public CommonRuntimeTest { + public: + ZygoteHeapTest() { + use_boot_image_ = true; // Make the Runtime creation cheaper. + } + void SetUpRuntimeOptions(RuntimeOptions* options) override { CommonRuntimeTest::SetUpRuntimeOptions(options); options->push_back(std::make_pair("-Xzygote", nullptr)); diff --git a/runtime/gc/heap_verification_test.cc b/runtime/gc/heap_verification_test.cc index ca6a30b11d..a7583fe7f1 100644 --- a/runtime/gc/heap_verification_test.cc +++ b/runtime/gc/heap_verification_test.cc @@ -26,14 +26,16 @@ #include "mirror/string.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" -#include "verification.h" +#include "verification-inl.h" namespace art { namespace gc { class VerificationTest : public CommonRuntimeTest { protected: - VerificationTest() {} + VerificationTest() { + use_boot_image_ = true; // Make the Runtime creation cheaper. + } template <class T> ObjPtr<mirror::ObjectArray<T>> AllocObjectArray(Thread* self, size_t length) @@ -76,11 +78,11 @@ TEST_F(VerificationTest, IsValidClassOrNotInHeap) { Handle<mirror::String> string( hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "test"))); const Verification* const v = Runtime::Current()->GetHeap()->GetVerification(); - EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(1))); - EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(4))); + EXPECT_FALSE(v->IsValidClass(reinterpret_cast<mirror::Class*>(1))); + EXPECT_FALSE(v->IsValidClass(reinterpret_cast<mirror::Class*>(4))); EXPECT_FALSE(v->IsValidClass(nullptr)); EXPECT_TRUE(v->IsValidClass(string->GetClass())); - EXPECT_FALSE(v->IsValidClass(string.Get())); + EXPECT_FALSE(v->IsValidClass(reinterpret_cast<mirror::Class*>(string.Get()))); } TEST_F(VerificationTest, IsValidClassInHeap) { @@ -95,9 +97,9 @@ TEST_F(VerificationTest, IsValidClassInHeap) { Handle<mirror::String> string( hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "test"))); const Verification* const v = Runtime::Current()->GetHeap()->GetVerification(); - const uintptr_t uint_klass = reinterpret_cast<uintptr_t>(string->GetClass()); - EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(uint_klass - kObjectAlignment))); - EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(&uint_klass))); + uintptr_t uint_klass = reinterpret_cast<uintptr_t>(string->GetClass()); + EXPECT_FALSE(v->IsValidClass(reinterpret_cast<mirror::Class*>(uint_klass - kObjectAlignment))); + EXPECT_FALSE(v->IsValidClass(reinterpret_cast<mirror::Class*>(&uint_klass))); } TEST_F(VerificationTest, DumpInvalidObjectInfo) { diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc index 5e41ee4ef8..f24c94279c 100644 --- a/runtime/gc/reference_processor.cc +++ b/runtime/gc/reference_processor.cc @@ -90,7 +90,7 @@ void ReferenceProcessor::BroadcastForSlowPath(Thread* self) { ObjPtr<mirror::Object> ReferenceProcessor::GetReferent(Thread* self, ObjPtr<mirror::Reference> reference) { auto slow_path_required = [this, self]() REQUIRES_SHARED(Locks::mutator_lock_) { - return kUseReadBarrier ? !self->GetWeakRefAccessEnabled() : SlowPathEnabled(); + return gUseReadBarrier ? !self->GetWeakRefAccessEnabled() : SlowPathEnabled(); }; if (!slow_path_required()) { return reference->GetReferent(); @@ -118,10 +118,10 @@ ObjPtr<mirror::Object> ReferenceProcessor::GetReferent(Thread* self, // Keeping reference_processor_lock_ blocks the broadcast when we try to reenable the fast path. while (slow_path_required()) { DCHECK(collector_ != nullptr); - constexpr bool kOtherReadBarrier = kUseReadBarrier && !kUseBakerReadBarrier; + const bool other_read_barrier = !kUseBakerReadBarrier && gUseReadBarrier; if (UNLIKELY(reference->IsFinalizerReferenceInstance() || rp_state_ == RpState::kStarting /* too early to determine mark state */ - || (kOtherReadBarrier && reference->IsPhantomReferenceInstance()))) { + || (other_read_barrier && reference->IsPhantomReferenceInstance()))) { // Odd cases in which it doesn't hurt to just wait, or the wait is likely to be very brief. // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the @@ -210,7 +210,7 @@ void ReferenceProcessor::ProcessReferences(Thread* self, TimingLogger* timings) } { MutexLock mu(self, *Locks::reference_processor_lock_); - if (!kUseReadBarrier) { + if (!gUseReadBarrier) { CHECK_EQ(SlowPathEnabled(), concurrent_) << "Slow path must be enabled iff concurrent"; } else { // Weak ref access is enabled at Zygote compaction by SemiSpace (concurrent_ == false). @@ -305,7 +305,7 @@ void ReferenceProcessor::ProcessReferences(Thread* self, TimingLogger* timings) // could result in a stale is_marked_callback_ being called before the reference processing // starts since there is a small window of time where slow_path_enabled_ is enabled but the // callback isn't yet set. - if (!kUseReadBarrier && concurrent_) { + if (!gUseReadBarrier && concurrent_) { // Done processing, disable the slow path and broadcast to the waiters. DisableSlowPath(self); } @@ -363,9 +363,8 @@ class ClearedReferenceTask : public HeapTask { } void Run(Thread* thread) override { ScopedObjectAccess soa(thread); - jvalue args[1]; - args[0].l = cleared_references_; - InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_ReferenceQueue_add, args); + WellKnownClasses::java_lang_ref_ReferenceQueue_add->InvokeStatic<'V', 'L'>( + thread, soa.Decode<mirror::Object>(cleared_references_)); soa.Env()->DeleteGlobalRef(cleared_references_); } @@ -418,8 +417,8 @@ void ReferenceProcessor::ClearReferent(ObjPtr<mirror::Reference> ref) { void ReferenceProcessor::WaitUntilDoneProcessingReferences(Thread* self) { // Wait until we are done processing reference. - while ((!kUseReadBarrier && SlowPathEnabled()) || - (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) { + while ((!gUseReadBarrier && SlowPathEnabled()) || + (gUseReadBarrier && !self->GetWeakRefAccessEnabled())) { // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the // presence of threads blocking for weak ref access. self->CheckEmptyCheckpointFromWeakRefAccess(Locks::reference_processor_lock_); diff --git a/runtime/gc/reference_queue_test.cc b/runtime/gc/reference_queue_test.cc index c680fb5781..c8e71b02ac 100644 --- a/runtime/gc/reference_queue_test.cc +++ b/runtime/gc/reference_queue_test.cc @@ -26,7 +26,12 @@ namespace art { namespace gc { -class ReferenceQueueTest : public CommonRuntimeTest {}; +class ReferenceQueueTest : public CommonRuntimeTest { + protected: + ReferenceQueueTest() { + use_boot_image_ = true; // Make the Runtime creation cheaper. + } +}; TEST_F(ReferenceQueueTest, EnqueueDequeue) { Thread* self = Thread::Current(); diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc index eaede43e79..7a0a6e8736 100644 --- a/runtime/gc/scoped_gc_critical_section.cc +++ b/runtime/gc/scoped_gc_critical_section.cc @@ -58,17 +58,5 @@ ScopedGCCriticalSection::~ScopedGCCriticalSection() { critical_section_.Exit(old_no_suspend_reason_); } -ScopedInterruptibleGCCriticalSection::ScopedInterruptibleGCCriticalSection( - Thread* self, - GcCause cause, - CollectorType type) : self_(self) { - DCHECK(self != nullptr); - Runtime::Current()->GetHeap()->StartGC(self_, cause, type); -} - -ScopedInterruptibleGCCriticalSection::~ScopedInterruptibleGCCriticalSection() { - Runtime::Current()->GetHeap()->FinishGC(self_, collector::kGcTypeNone); -} - } // namespace gc } // namespace art diff --git a/runtime/gc/scoped_gc_critical_section.h b/runtime/gc/scoped_gc_critical_section.h index b3a897c76b..8ad01580c2 100644 --- a/runtime/gc/scoped_gc_critical_section.h +++ b/runtime/gc/scoped_gc_critical_section.h @@ -59,19 +59,6 @@ class ScopedGCCriticalSection { const char* old_no_suspend_reason_; }; -// The use of ScopedGCCriticalSection should be preferred whenever possible. -// This class allows thread suspension but should never be used with allocations because of the -// deadlock risk. TODO: Add a new thread role for "no allocations" that still allows suspension. -class ScopedInterruptibleGCCriticalSection { - public: - ScopedInterruptibleGCCriticalSection(Thread* self, GcCause cause, CollectorType type); - ~ScopedInterruptibleGCCriticalSection(); - - private: - Thread* const self_; -}; - - } // namespace gc } // namespace art diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h index 20f7a93eb1..2774b9e71c 100644 --- a/runtime/gc/space/bump_pointer_space-inl.h +++ b/runtime/gc/space/bump_pointer_space-inl.h @@ -20,6 +20,7 @@ #include "bump_pointer_space.h" #include "base/bit_utils.h" +#include "mirror/object-inl.h" namespace art { namespace gc { @@ -89,6 +90,11 @@ inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) { return ret; } +inline mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) { + const uintptr_t position = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf(); + return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment)); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/bump_pointer_space-walk-inl.h b/runtime/gc/space/bump_pointer_space-walk-inl.h index 5d05ea2d65..a978f62c61 100644 --- a/runtime/gc/space/bump_pointer_space-walk-inl.h +++ b/runtime/gc/space/bump_pointer_space-walk-inl.h @@ -17,12 +17,14 @@ #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_WALK_INL_H_ #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_WALK_INL_H_ -#include "bump_pointer_space.h" +#include "bump_pointer_space-inl.h" #include "base/bit_utils.h" #include "mirror/object-inl.h" #include "thread-current-inl.h" +#include <memory> + namespace art { namespace gc { namespace space { @@ -32,6 +34,7 @@ inline void BumpPointerSpace::Walk(Visitor&& visitor) { uint8_t* pos = Begin(); uint8_t* end = End(); uint8_t* main_end = pos; + std::unique_ptr<std::vector<size_t>> block_sizes_copy; // Internal indirection w/ NO_THREAD_SAFETY_ANALYSIS. Optimally, we'd like to have an annotation // like // REQUIRES_AS(visitor.operator(mirror::Object*)) @@ -49,15 +52,17 @@ inline void BumpPointerSpace::Walk(Visitor&& visitor) { MutexLock mu(Thread::Current(), block_lock_); // If we have 0 blocks then we need to update the main header since we have bump pointer style // allocation into an unbounded region (actually bounded by Capacity()). - if (num_blocks_ == 0) { + if (block_sizes_.empty()) { UpdateMainBlock(); } main_end = Begin() + main_block_size_; - if (num_blocks_ == 0) { + if (block_sizes_.empty()) { // We don't have any other blocks, this means someone else may be allocating into the main // block. In this case, we don't want to try and visit the other blocks after the main block // since these could actually be part of the main block. end = main_end; + } else { + block_sizes_copy.reset(new std::vector<size_t>(block_sizes_.begin(), block_sizes_.end())); } } // Walk all of the objects in the main block first. @@ -66,31 +71,33 @@ inline void BumpPointerSpace::Walk(Visitor&& visitor) { // No read barrier because obj may not be a valid object. if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() == nullptr) { // There is a race condition where a thread has just allocated an object but not set the - // class. We can't know the size of this object, so we don't visit it and exit the function - // since there is guaranteed to be not other blocks. - return; + // class. We can't know the size of this object, so we don't visit it and break the loop + pos = main_end; + break; } else { no_thread_safety_analysis_visit(obj); pos = reinterpret_cast<uint8_t*>(GetNextObject(obj)); } } // Walk the other blocks (currently only TLABs). - while (pos < end) { - BlockHeader* header = reinterpret_cast<BlockHeader*>(pos); - size_t block_size = header->size_; - pos += sizeof(BlockHeader); // Skip the header so that we know where the objects - mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos); - const mirror::Object* end_obj = reinterpret_cast<const mirror::Object*>(pos + block_size); - CHECK_LE(reinterpret_cast<const uint8_t*>(end_obj), End()); - // We don't know how many objects are allocated in the current block. When we hit a null class - // assume its the end. TODO: Have a thread update the header when it flushes the block? - // No read barrier because obj may not be a valid object. - while (obj < end_obj && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { - no_thread_safety_analysis_visit(obj); - obj = GetNextObject(obj); + if (block_sizes_copy != nullptr) { + for (size_t block_size : *block_sizes_copy) { + mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos); + const mirror::Object* end_obj = reinterpret_cast<const mirror::Object*>(pos + block_size); + CHECK_LE(reinterpret_cast<const uint8_t*>(end_obj), End()); + // We don't know how many objects are allocated in the current block. When we hit a null class + // assume it's the end. TODO: Have a thread update the header when it flushes the block? + // No read barrier because obj may not be a valid object. + while (obj < end_obj && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { + no_thread_safety_analysis_visit(obj); + obj = GetNextObject(obj); + } + pos += block_size; } - pos += block_size; + } else { + CHECK_EQ(end, main_end); } + CHECK_EQ(pos, end); } } // namespace space diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc index 3a0155a278..7753f73ca4 100644 --- a/runtime/gc/space/bump_pointer_space.cc +++ b/runtime/gc/space/bump_pointer_space.cc @@ -54,8 +54,9 @@ BumpPointerSpace::BumpPointerSpace(const std::string& name, uint8_t* begin, uint growth_end_(limit), objects_allocated_(0), bytes_allocated_(0), block_lock_("Block lock"), - main_block_size_(0), - num_blocks_(0) { + main_block_size_(0) { + // This constructor gets called only from Heap::PreZygoteFork(), which + // doesn't require a mark_bitmap. } BumpPointerSpace::BumpPointerSpace(const std::string& name, MemMap&& mem_map) @@ -68,8 +69,11 @@ BumpPointerSpace::BumpPointerSpace(const std::string& name, MemMap&& mem_map) growth_end_(mem_map_.End()), objects_allocated_(0), bytes_allocated_(0), block_lock_("Block lock", kBumpPointerSpaceBlockLock), - main_block_size_(0), - num_blocks_(0) { + main_block_size_(0) { + mark_bitmap_ = + accounting::ContinuousSpaceBitmap::Create("bump-pointer space live bitmap", + Begin(), + Capacity()); } void BumpPointerSpace::Clear() { @@ -86,7 +90,7 @@ void BumpPointerSpace::Clear() { growth_end_ = Limit(); { MutexLock mu(Thread::Current(), block_lock_); - num_blocks_ = 0; + block_sizes_.clear(); main_block_size_ = 0; } } @@ -97,11 +101,6 @@ void BumpPointerSpace::Dump(std::ostream& os) const { << reinterpret_cast<void*>(Limit()); } -mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) { - const uintptr_t position = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf(); - return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment)); -} - size_t BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) { MutexLock mu(Thread::Current(), block_lock_); RevokeThreadLocalBuffersLocked(thread); @@ -141,23 +140,19 @@ void BumpPointerSpace::AssertAllThreadLocalBuffersAreRevoked() { } void BumpPointerSpace::UpdateMainBlock() { - DCHECK_EQ(num_blocks_, 0U); + DCHECK(block_sizes_.empty()); main_block_size_ = Size(); } // Returns the start of the storage. uint8_t* BumpPointerSpace::AllocBlock(size_t bytes) { bytes = RoundUp(bytes, kAlignment); - if (!num_blocks_) { + if (block_sizes_.empty()) { UpdateMainBlock(); } - uint8_t* storage = reinterpret_cast<uint8_t*>( - AllocNonvirtualWithoutAccounting(bytes + sizeof(BlockHeader))); + uint8_t* storage = reinterpret_cast<uint8_t*>(AllocNonvirtualWithoutAccounting(bytes)); if (LIKELY(storage != nullptr)) { - BlockHeader* header = reinterpret_cast<BlockHeader*>(storage); - header->size_ = bytes; // Write out the block header. - storage += sizeof(BlockHeader); - ++num_blocks_; + block_sizes_.push_back(bytes); } return storage; } @@ -177,7 +172,7 @@ uint64_t BumpPointerSpace::GetBytesAllocated() { MutexLock mu3(Thread::Current(), block_lock_); // If we don't have any blocks, we don't have any thread local buffers. This check is required // since there can exist multiple bump pointer spaces which exist at the same time. - if (num_blocks_ > 0) { + if (!block_sizes_.empty()) { for (Thread* thread : thread_list) { total += thread->GetThreadLocalBytesAllocated(); } @@ -195,7 +190,7 @@ uint64_t BumpPointerSpace::GetObjectsAllocated() { MutexLock mu3(Thread::Current(), block_lock_); // If we don't have any blocks, we don't have any thread local buffers. This check is required // since there can exist multiple bump pointer spaces which exist at the same time. - if (num_blocks_ > 0) { + if (!block_sizes_.empty()) { for (Thread* thread : thread_list) { total += thread->GetThreadLocalObjectsAllocated(); } @@ -240,6 +235,52 @@ size_t BumpPointerSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* u return num_bytes; } +uint8_t* BumpPointerSpace::AlignEnd(Thread* self, size_t alignment) { + Locks::mutator_lock_->AssertExclusiveHeld(self); + DCHECK(IsAligned<kAlignment>(alignment)); + uint8_t* end = end_.load(std::memory_order_relaxed); + uint8_t* aligned_end = AlignUp(end, alignment); + ptrdiff_t diff = aligned_end - end; + if (diff > 0) { + end_.store(aligned_end, std::memory_order_relaxed); + // If we have blocks after the main one. Then just add the diff to the last + // block. + MutexLock mu(self, block_lock_); + if (!block_sizes_.empty()) { + block_sizes_.back() += diff; + } + } + return end; +} + +std::vector<size_t>* BumpPointerSpace::GetBlockSizes(Thread* self, size_t* main_block_size) { + std::vector<size_t>* block_sizes = nullptr; + MutexLock mu(self, block_lock_); + if (!block_sizes_.empty()) { + block_sizes = new std::vector<size_t>(block_sizes_.begin(), block_sizes_.end()); + } else { + UpdateMainBlock(); + } + *main_block_size = main_block_size_; + return block_sizes; +} + +void BumpPointerSpace::SetBlockSizes(Thread* self, + const size_t main_block_size, + const size_t first_valid_idx) { + MutexLock mu(self, block_lock_); + main_block_size_ = main_block_size; + if (!block_sizes_.empty()) { + block_sizes_.erase(block_sizes_.begin(), block_sizes_.begin() + first_valid_idx); + } + size_t size = main_block_size; + for (size_t block_size : block_sizes_) { + size += block_size; + } + DCHECK(IsAligned<kAlignment>(size)); + end_.store(Begin() + size, std::memory_order_relaxed); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h index 08ed503b5f..bba171109d 100644 --- a/runtime/gc/space/bump_pointer_space.h +++ b/runtime/gc/space/bump_pointer_space.h @@ -17,9 +17,10 @@ #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_ #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_ +#include "base/mutex.h" #include "space.h" -#include "base/mutex.h" +#include <deque> namespace art { @@ -30,6 +31,7 @@ class Object; namespace gc { namespace collector { +class MarkCompact; class MarkSweep; } // namespace collector @@ -39,7 +41,7 @@ namespace space { // implementation as its intended to be evacuated. class BumpPointerSpace final : public ContinuousMemMapAllocSpace { public: - typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg); + using WalkCallback = void (*)(void *, void *, int, void *); SpaceType GetType() const override { return kSpaceTypeBumpPointerSpace; @@ -100,10 +102,6 @@ class BumpPointerSpace final : public ContinuousMemMapAllocSpace { return nullptr; } - accounting::ContinuousSpaceBitmap* GetMarkBitmap() override { - return nullptr; - } - // Reset the space to empty. void Clear() override REQUIRES(!block_lock_); @@ -120,6 +118,11 @@ class BumpPointerSpace final : public ContinuousMemMapAllocSpace { REQUIRES(!*Locks::runtime_shutdown_lock_, !*Locks::thread_list_lock_, !block_lock_); uint64_t GetObjectsAllocated() override REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*Locks::runtime_shutdown_lock_, !*Locks::thread_list_lock_, !block_lock_); + // Return the pre-determined allocated object count. This could be beneficial + // when we know that all the TLABs are revoked. + int32_t GetAccumulatedObjectsAllocated() REQUIRES_SHARED(Locks::mutator_lock_) { + return objects_allocated_.load(std::memory_order_relaxed); + } bool IsEmpty() const { return Begin() == End(); } @@ -128,18 +131,9 @@ class BumpPointerSpace final : public ContinuousMemMapAllocSpace { return true; } - bool Contains(const mirror::Object* obj) const override { - const uint8_t* byte_obj = reinterpret_cast<const uint8_t*>(obj); - return byte_obj >= Begin() && byte_obj < End(); - } - // TODO: Change this? Mainly used for compacting to a particular region of memory. BumpPointerSpace(const std::string& name, uint8_t* begin, uint8_t* limit); - // Return the object which comes after obj, while ensuring alignment. - static mirror::Object* GetNextObject(mirror::Object* obj) - REQUIRES_SHARED(Locks::mutator_lock_); - // Allocate a new TLAB, returns false if the allocation failed. bool AllocNewTlab(Thread* self, size_t bytes) REQUIRES(!block_lock_); @@ -165,7 +159,7 @@ class BumpPointerSpace final : public ContinuousMemMapAllocSpace { REQUIRES_SHARED(Locks::mutator_lock_); // Object alignment within the space. - static constexpr size_t kAlignment = 8; + static constexpr size_t kAlignment = kObjectAlignment; protected: BumpPointerSpace(const std::string& name, MemMap&& mem_map); @@ -183,23 +177,40 @@ class BumpPointerSpace final : public ContinuousMemMapAllocSpace { AtomicInteger objects_allocated_; // Accumulated from revoked thread local regions. AtomicInteger bytes_allocated_; // Accumulated from revoked thread local regions. Mutex block_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - // The objects at the start of the space are stored in the main block. The main block doesn't - // have a header, this lets us walk empty spaces which are mprotected. + // The objects at the start of the space are stored in the main block. size_t main_block_size_ GUARDED_BY(block_lock_); - // The number of blocks in the space, if it is 0 then the space has one long continuous block - // which doesn't have an updated header. - size_t num_blocks_ GUARDED_BY(block_lock_); + // List of block sizes (in bytes) after the main-block. Needed for Walk(). + // If empty then the space has only one long continuous block. Each TLAB + // allocation has one entry in this deque. + // Keeping block-sizes off-heap simplifies sliding compaction algorithms. + // The compaction algorithm should ideally compact all objects into the main + // block, thereby enabling erasing corresponding entries from here. + std::deque<size_t> block_sizes_ GUARDED_BY(block_lock_); private: - struct BlockHeader { - size_t size_; // Size of the block in bytes, does not include the header. - size_t unused_; // Ensures alignment of kAlignment. - }; + // Return the object which comes after obj, while ensuring alignment. + static mirror::Object* GetNextObject(mirror::Object* obj) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Return a vector of block sizes on the space. Required by MarkCompact GC for + // walking black objects allocated after marking phase. + std::vector<size_t>* GetBlockSizes(Thread* self, size_t* main_block_size) REQUIRES(!block_lock_); + + // Once the MarkCompact decides the post-compact layout of the space in the + // pre-compaction pause, it calls this function to update the block sizes. It is + // done by passing the new main-block size, which consumes a bunch of blocks + // into itself, and the index of first unconsumed block. This works as all the + // block sizes are ordered. Also updates 'end_' to reflect the change. + void SetBlockSizes(Thread* self, const size_t main_block_size, const size_t first_valid_idx) + REQUIRES(!block_lock_, Locks::mutator_lock_); - static_assert(sizeof(BlockHeader) % kAlignment == 0, - "continuous block must be kAlignment aligned"); + // Align end to the given alignment. This is done in MarkCompact GC when + // mutators are suspended so that upcoming TLAB allocations start with a new + // page. Returns the pre-alignment end. + uint8_t* AlignEnd(Thread* self, size_t alignment) REQUIRES(Locks::mutator_lock_); friend class collector::MarkSweep; + friend class collector::MarkCompact; DISALLOW_COPY_AND_ASSIGN(BumpPointerSpace); }; diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h index 4fc4adac91..6041fd02af 100644 --- a/runtime/gc/space/dlmalloc_space-inl.h +++ b/runtime/gc/space/dlmalloc_space-inl.h @@ -18,7 +18,7 @@ #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_ #include "dlmalloc_space.h" -#include "gc/allocator/dlmalloc.h" +#include "gc/allocator/art-dlmalloc.h" #include "thread.h" namespace art { diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc index 25cac7efde..1edcdbdf91 100644 --- a/runtime/gc/space/dlmalloc_space.cc +++ b/runtime/gc/space/dlmalloc_space.cc @@ -350,11 +350,18 @@ void DlMallocSpace::CheckMoreCoreForPrecondition() { } #endif +struct MspaceCbArgs { + size_t max_contiguous; + size_t used; +}; + static void MSpaceChunkCallback(void* start, void* end, size_t used_bytes, void* arg) { size_t chunk_size = reinterpret_cast<uint8_t*>(end) - reinterpret_cast<uint8_t*>(start); + MspaceCbArgs* mspace_cb_args = reinterpret_cast<MspaceCbArgs*>(arg); + mspace_cb_args->used += used_bytes; if (used_bytes < chunk_size) { size_t chunk_free_bytes = chunk_size - used_bytes; - size_t& max_contiguous_allocation = *reinterpret_cast<size_t*>(arg); + size_t& max_contiguous_allocation = mspace_cb_args->max_contiguous; max_contiguous_allocation = std::max(max_contiguous_allocation, chunk_free_bytes); } } @@ -362,16 +369,17 @@ static void MSpaceChunkCallback(void* start, void* end, size_t used_bytes, void* bool DlMallocSpace::LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) { Thread* const self = Thread::Current(); - size_t max_contiguous_allocation = 0; + MspaceCbArgs mspace_cb_args = {0, 0}; // To allow the Walk/InspectAll() to exclusively-lock the mutator // lock, temporarily release the shared access to the mutator // lock here by transitioning to the suspended state. Locks::mutator_lock_->AssertSharedHeld(self); ScopedThreadSuspension sts(self, ThreadState::kSuspended); - Walk(MSpaceChunkCallback, &max_contiguous_allocation); - if (failed_alloc_bytes > max_contiguous_allocation) { - os << "; failed due to fragmentation (largest possible contiguous allocation " - << max_contiguous_allocation << " bytes)"; + Walk(MSpaceChunkCallback, &mspace_cb_args); + if (failed_alloc_bytes > mspace_cb_args.max_contiguous) { + os << "; failed due to malloc_space fragmentation (largest possible contiguous allocation " + << mspace_cb_args.max_contiguous << " bytes, space in use " << mspace_cb_args.used + << " bytes, capacity = " << Capacity() << ")"; return true; } return false; diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index 6afd63e4a5..13966d8d97 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -23,7 +23,9 @@ #include <memory> #include <random> #include <string> +#include <vector> +#include "android-base/logging.h" #include "android-base/stringprintf.h" #include "android-base/strings.h" #include "android-base/unique_fd.h" @@ -49,6 +51,7 @@ #include "dex/art_dex_file_loader.h" #include "dex/dex_file_loader.h" #include "exec_utils.h" +#include "fmt/format.h" #include "gc/accounting/space_bitmap-inl.h" #include "gc/task_processor.h" #include "image-inl.h" @@ -69,14 +72,20 @@ namespace art { namespace gc { namespace space { -using android::base::Join; -using android::base::StringAppendF; -using android::base::StringPrintf; +namespace { + +using ::android::base::Join; +using ::android::base::StringAppendF; +using ::android::base::StringPrintf; + +using ::fmt::literals::operator""_format; // NOLINT // We do not allow the boot image and extensions to take more than 1GiB. They are // supposed to be much smaller and allocating more that this would likely fail anyway. static constexpr size_t kMaxTotalImageReservationSize = 1 * GB; +} // namespace + Atomic<uint32_t> ImageSpace::bitmap_index_(0); ImageSpace::ImageSpace(const std::string& image_filename, @@ -198,7 +207,6 @@ void ImageSpace::VerifyImageAllocations() { // Helper class for relocating from one range of memory to another. class RelocationRange { public: - RelocationRange() = default; RelocationRange(const RelocationRange&) = default; RelocationRange(uintptr_t source, uintptr_t dest, uintptr_t length) : source_(source), @@ -372,6 +380,64 @@ class ImageSpace::PatchObjectVisitor final { const {} void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {} + template <typename T> void VisitNativeDexCacheArray(mirror::NativeArray<T>* array) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (array == nullptr) { + return; + } + DCHECK_ALIGNED(array, static_cast<size_t>(kPointerSize)); + uint32_t size = (kPointerSize == PointerSize::k32) + ? reinterpret_cast<uint32_t*>(array)[-1] + : dchecked_integral_cast<uint32_t>(reinterpret_cast<uint64_t*>(array)[-1]); + for (uint32_t i = 0; i < size; ++i) { + PatchNativePointer(array->GetPtrEntryPtrSize(i, kPointerSize)); + } + } + + template <typename T> void VisitGcRootDexCacheArray(mirror::GcRootArray<T>* array) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (array == nullptr) { + return; + } + DCHECK_ALIGNED(array, sizeof(GcRoot<T>)); + static_assert(sizeof(GcRoot<T>) == sizeof(uint32_t)); + uint32_t size = reinterpret_cast<uint32_t*>(array)[-1]; + for (uint32_t i = 0; i < size; ++i) { + PatchGcRoot(array->GetGcRootAddress(i)); + } + } + + void VisitDexCacheArrays(ObjPtr<mirror::DexCache> dex_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + mirror::NativeArray<ArtMethod>* old_resolved_methods = dex_cache->GetResolvedMethodsArray(); + if (old_resolved_methods != nullptr) { + mirror::NativeArray<ArtMethod>* resolved_methods = native_visitor_(old_resolved_methods); + dex_cache->SetResolvedMethodsArray(resolved_methods); + VisitNativeDexCacheArray(resolved_methods); + } + + mirror::NativeArray<ArtField>* old_resolved_fields = dex_cache->GetResolvedFieldsArray(); + if (old_resolved_fields != nullptr) { + mirror::NativeArray<ArtField>* resolved_fields = native_visitor_(old_resolved_fields); + dex_cache->SetResolvedFieldsArray(resolved_fields); + VisitNativeDexCacheArray(resolved_fields); + } + + mirror::GcRootArray<mirror::String>* old_strings = dex_cache->GetStringsArray(); + if (old_strings != nullptr) { + mirror::GcRootArray<mirror::String>* strings = native_visitor_(old_strings); + dex_cache->SetStringsArray(strings); + VisitGcRootDexCacheArray(strings); + } + + mirror::GcRootArray<mirror::Class>* old_types = dex_cache->GetResolvedTypesArray(); + if (old_types != nullptr) { + mirror::GcRootArray<mirror::Class>* types = native_visitor_(old_types); + dex_cache->SetResolvedTypesArray(types); + VisitGcRootDexCacheArray(types); + } + } + template <bool kMayBeNull = true, typename T> ALWAYS_INLINE void PatchGcRoot(/*inout*/GcRoot<T>* root) const REQUIRES_SHARED(Locks::mutator_lock_) { @@ -513,7 +579,8 @@ class ImageSpace::Loader { // Check the oat file checksum. const uint32_t oat_checksum = oat_file->GetOatHeader().GetChecksum(); const uint32_t image_oat_checksum = image_header.GetOatChecksum(); - if (oat_checksum != image_oat_checksum) { + // Note image_oat_checksum is 0 for images generated by the runtime. + if (image_oat_checksum != 0u && oat_checksum != image_oat_checksum) { *error_msg = StringPrintf("Oat checksum 0x%x does not match the image one 0x%x in image %s", oat_checksum, image_oat_checksum, @@ -1299,6 +1366,16 @@ class ImageSpace::Loader { image_header->RelocateImageReferences(app_image_objects.Delta()); image_header->RelocateBootImageReferences(boot_image.Delta()); CHECK_EQ(image_header->GetImageBegin(), target_base); + + // Fix up dex cache arrays. + ObjPtr<mirror::ObjectArray<mirror::DexCache>> dex_caches = + image_header->GetImageRoot<kWithoutReadBarrier>(ImageHeader::kDexCaches) + ->AsObjectArray<mirror::DexCache, kVerifyNone>(); + for (int32_t i = 0, count = dex_caches->GetLength(); i < count; ++i) { + ObjPtr<mirror::DexCache> dex_cache = + dex_caches->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(i); + patch_object_visitor.VisitDexCacheArrays(dex_cache); + } } { // Only touches objects in the app image, no need for mutator lock. @@ -1366,9 +1443,9 @@ class ImageSpace::Loader { } }; -static void AppendImageChecksum(uint32_t component_count, - uint32_t checksum, - /*inout*/std::string* checksums) { +void ImageSpace::AppendImageChecksum(uint32_t component_count, + uint32_t checksum, + /*inout*/ std::string* checksums) { static_assert(ImageSpace::kImageChecksumPrefix == 'i', "Format prefix check."); StringAppendF(checksums, "i;%u/%08x", component_count, checksum); } @@ -1378,7 +1455,7 @@ static bool CheckAndRemoveImageChecksum(uint32_t component_count, /*inout*/std::string_view* oat_checksums, /*out*/std::string* error_msg) { std::string image_checksum; - AppendImageChecksum(component_count, checksum, &image_checksum); + ImageSpace::AppendImageChecksum(component_count, checksum, &image_checksum); if (!StartsWith(*oat_checksums, image_checksum)) { *error_msg = StringPrintf("Image checksum mismatch, expected %s to start with %s", std::string(*oat_checksums).c_str(), @@ -1389,182 +1466,6 @@ static bool CheckAndRemoveImageChecksum(uint32_t component_count, return true; } -// Helper class to find the primary boot image and boot image extensions -// and determine the boot image layout. -class ImageSpace::BootImageLayout { - public: - // Description of a "chunk" of the boot image, i.e. either primary boot image - // or a boot image extension, used in conjunction with the boot class path to - // load boot image components. - struct ImageChunk { - std::string base_location; - std::string base_filename; - std::vector<std::string> profile_files; - size_t start_index; - uint32_t component_count; - uint32_t image_space_count; - uint32_t reservation_size; - uint32_t checksum; - uint32_t boot_image_component_count; - uint32_t boot_image_checksum; - uint32_t boot_image_size; - - // The following file descriptors hold the memfd files for extensions compiled - // in memory and described by the above fields. We want to use them to mmap() - // the contents and then close them while treating the ImageChunk description - // as immutable (const), so make these fields explicitly mutable. - mutable android::base::unique_fd art_fd; - mutable android::base::unique_fd vdex_fd; - mutable android::base::unique_fd oat_fd; - }; - - BootImageLayout(ArrayRef<const std::string> image_locations, - ArrayRef<const std::string> boot_class_path, - ArrayRef<const std::string> boot_class_path_locations, - ArrayRef<const int> boot_class_path_fds, - ArrayRef<const int> boot_class_path_image_fds, - ArrayRef<const int> boot_class_path_vdex_fds, - ArrayRef<const int> boot_class_path_oat_fds) - : image_locations_(image_locations), - boot_class_path_(boot_class_path), - boot_class_path_locations_(boot_class_path_locations), - boot_class_path_fds_(boot_class_path_fds), - boot_class_path_image_fds_(boot_class_path_image_fds), - boot_class_path_vdex_fds_(boot_class_path_vdex_fds), - boot_class_path_oat_fds_(boot_class_path_oat_fds) {} - - std::string GetPrimaryImageLocation(); - - bool LoadFromSystem(InstructionSet image_isa, /*out*/std::string* error_msg) { - return LoadOrValidateFromSystem(image_isa, /*oat_checksums=*/ nullptr, error_msg); - } - - bool ValidateFromSystem(InstructionSet image_isa, - /*inout*/std::string_view* oat_checksums, - /*out*/std::string* error_msg) { - DCHECK(oat_checksums != nullptr); - return LoadOrValidateFromSystem(image_isa, oat_checksums, error_msg); - } - - ArrayRef<const ImageChunk> GetChunks() const { - return ArrayRef<const ImageChunk>(chunks_); - } - - uint32_t GetBaseAddress() const { - return base_address_; - } - - size_t GetNextBcpIndex() const { - return next_bcp_index_; - } - - size_t GetTotalComponentCount() const { - return total_component_count_; - } - - size_t GetTotalReservationSize() const { - return total_reservation_size_; - } - - private: - struct NamedComponentLocation { - std::string base_location; - size_t bcp_index; - std::vector<std::string> profile_filenames; - }; - - std::string ExpandLocationImpl(const std::string& location, - size_t bcp_index, - bool boot_image_extension) { - std::vector<std::string> expanded = ExpandMultiImageLocations( - ArrayRef<const std::string>(boot_class_path_).SubArray(bcp_index, 1u), - location, - boot_image_extension); - DCHECK_EQ(expanded.size(), 1u); - return expanded[0]; - } - - std::string ExpandLocation(const std::string& location, size_t bcp_index) { - if (bcp_index == 0u) { - DCHECK_EQ(location, ExpandLocationImpl(location, bcp_index, /*boot_image_extension=*/ false)); - return location; - } else { - return ExpandLocationImpl(location, bcp_index, /*boot_image_extension=*/ true); - } - } - - std::string GetBcpComponentPath(size_t bcp_index) { - DCHECK_LE(bcp_index, boot_class_path_.size()); - size_t bcp_slash_pos = boot_class_path_[bcp_index].rfind('/'); - DCHECK_NE(bcp_slash_pos, std::string::npos); - return boot_class_path_[bcp_index].substr(0u, bcp_slash_pos + 1u); - } - - bool VerifyImageLocation(ArrayRef<const std::string> components, - /*out*/size_t* named_components_count, - /*out*/std::string* error_msg); - - bool MatchNamedComponents( - ArrayRef<const std::string> named_components, - /*out*/std::vector<NamedComponentLocation>* named_component_locations, - /*out*/std::string* error_msg); - - bool ValidateBootImageChecksum(const char* file_description, - const ImageHeader& header, - /*out*/std::string* error_msg); - - bool ValidateHeader(const ImageHeader& header, - size_t bcp_index, - const char* file_description, - /*out*/std::string* error_msg); - - bool ValidateOatFile(const std::string& base_location, - const std::string& base_filename, - size_t bcp_index, - size_t component_count, - /*out*/std::string* error_msg); - - bool ReadHeader(const std::string& base_location, - const std::string& base_filename, - size_t bcp_index, - /*out*/std::string* error_msg); - - // Compiles a consecutive subsequence of bootclasspath dex files, whose contents are included in - // the profiles specified by `profile_filenames`, starting from `bcp_index`. - bool CompileBootclasspathElements(const std::string& base_location, - const std::string& base_filename, - size_t bcp_index, - const std::vector<std::string>& profile_filenames, - ArrayRef<const std::string> dependencies, - /*out*/std::string* error_msg); - - bool CheckAndRemoveLastChunkChecksum(/*inout*/std::string_view* oat_checksums, - /*out*/std::string* error_msg); - - template <typename FilenameFn> - bool LoadOrValidate(FilenameFn&& filename_fn, - /*inout*/std::string_view* oat_checksums, - /*out*/std::string* error_msg); - - bool LoadOrValidateFromSystem(InstructionSet image_isa, - /*inout*/std::string_view* oat_checksums, - /*out*/std::string* error_msg); - - ArrayRef<const std::string> image_locations_; - ArrayRef<const std::string> boot_class_path_; - ArrayRef<const std::string> boot_class_path_locations_; - ArrayRef<const int> boot_class_path_fds_; - ArrayRef<const int> boot_class_path_image_fds_; - ArrayRef<const int> boot_class_path_vdex_fds_; - ArrayRef<const int> boot_class_path_oat_fds_; - - std::vector<ImageChunk> chunks_; - uint32_t base_address_ = 0u; - size_t next_bcp_index_ = 0u; - size_t total_component_count_ = 0u; - size_t total_reservation_size_ = 0u; -}; - std::string ImageSpace::BootImageLayout::GetPrimaryImageLocation() { DCHECK(!image_locations_.empty()); std::string location = image_locations_[0]; @@ -1886,7 +1787,7 @@ bool ImageSpace::BootImageLayout::ValidateOatFile( error_msg->c_str()); return false; } - if (!ImageSpace::ValidateOatFile(*oat_file, error_msg, dex_filenames, dex_fds)) { + if (!ImageSpace::ValidateOatFile(*oat_file, error_msg, dex_filenames, dex_fds, apex_versions_)) { return false; } return true; @@ -2151,48 +2052,12 @@ bool ImageSpace::BootImageLayout::CompileBootclasspathElements( return true; } -bool ImageSpace::BootImageLayout::CheckAndRemoveLastChunkChecksum( - /*inout*/std::string_view* oat_checksums, - /*out*/std::string* error_msg) { - DCHECK(oat_checksums != nullptr); - DCHECK(!chunks_.empty()); - const ImageChunk& chunk = chunks_.back(); - size_t component_count = chunk.component_count; - size_t checksum = chunk.checksum; - if (!CheckAndRemoveImageChecksum(component_count, checksum, oat_checksums, error_msg)) { - DCHECK(!error_msg->empty()); - return false; - } - if (oat_checksums->empty()) { - if (next_bcp_index_ != boot_class_path_.size()) { - *error_msg = StringPrintf("Checksum too short, missing %zu components.", - boot_class_path_.size() - next_bcp_index_); - return false; - } - return true; - } - if (!StartsWith(*oat_checksums, ":")) { - *error_msg = StringPrintf("Missing ':' separator at start of %s", - std::string(*oat_checksums).c_str()); - return false; - } - oat_checksums->remove_prefix(1u); - if (oat_checksums->empty()) { - *error_msg = "Missing checksums after the ':' separator."; - return false; - } - return true; -} - template <typename FilenameFn> -bool ImageSpace::BootImageLayout::LoadOrValidate(FilenameFn&& filename_fn, - /*inout*/std::string_view* oat_checksums, - /*out*/std::string* error_msg) { +bool ImageSpace::BootImageLayout::Load(FilenameFn&& filename_fn, + bool allow_in_memory_compilation, + /*out*/ std::string* error_msg) { DCHECK(GetChunks().empty()); DCHECK_EQ(GetBaseAddress(), 0u); - bool validate = (oat_checksums != nullptr); - static_assert(ImageSpace::kImageChecksumPrefix == 'i', "Format prefix check."); - DCHECK_IMPLIES(validate, StartsWith(*oat_checksums, "i")); ArrayRef<const std::string> components = image_locations_; size_t named_components_count = 0u; @@ -2223,24 +2088,21 @@ bool ImageSpace::BootImageLayout::LoadOrValidate(FilenameFn&& filename_fn, LOG(ERROR) << "Named image component already covered by previous image: " << base_location; continue; } - if (validate && bcp_index > bcp_pos) { - *error_msg = StringPrintf("End of contiguous boot class path images, remaining checksum: %s", - std::string(*oat_checksums).c_str()); - return false; - } std::string local_error_msg; - std::string* err_msg = validate ? error_msg : &local_error_msg; std::string base_filename; - if (!filename_fn(base_location, &base_filename, err_msg) || - !ReadHeader(base_location, base_filename, bcp_index, err_msg)) { - if (validate) { - return false; - } + if (!filename_fn(base_location, &base_filename, &local_error_msg) || + !ReadHeader(base_location, base_filename, bcp_index, &local_error_msg)) { LOG(ERROR) << "Error reading named image component header for " << base_location << ", error: " << local_error_msg; // If the primary boot image is invalid, we generate a single full image. This is faster than // generating the primary boot image and the extension separately. if (bcp_index == 0) { + if (!allow_in_memory_compilation) { + // The boot image is unusable and we can't continue by generating a boot image in memory. + // All we can do is to return. + *error_msg = std::move(local_error_msg); + return false; + } // We must at least have profiles for the core libraries. if (profile_filenames.empty()) { *error_msg = "Full boot image cannot be compiled because no profile is provided."; @@ -2264,14 +2126,15 @@ bool ImageSpace::BootImageLayout::LoadOrValidate(FilenameFn&& filename_fn, // No extensions are needed. return true; } - if (profile_filenames.empty() || + bool should_compile_extension = allow_in_memory_compilation && !profile_filenames.empty(); + if (!should_compile_extension || !CompileBootclasspathElements(base_location, base_filename, bcp_index, profile_filenames, components.SubArray(/*pos=*/ 0, /*length=*/ 1), &local_error_msg)) { - if (!profile_filenames.empty()) { + if (should_compile_extension) { LOG(ERROR) << "Error compiling boot image extension for " << boot_class_path_[bcp_index] << ", error: " << local_error_msg; } @@ -2280,14 +2143,6 @@ bool ImageSpace::BootImageLayout::LoadOrValidate(FilenameFn&& filename_fn, continue; } } - if (validate) { - if (!CheckAndRemoveLastChunkChecksum(oat_checksums, error_msg)) { - return false; - } - if (oat_checksums->empty() || !StartsWith(*oat_checksums, "i")) { - return true; // Let the caller deal with the dex file checksums if any. - } - } bcp_pos = GetNextBcpIndex(); } @@ -2320,24 +2175,10 @@ bool ImageSpace::BootImageLayout::LoadOrValidate(FilenameFn&& filename_fn, VLOG(image) << "Found image extension for " << ExpandLocation(base_location, bcp_pos); bcp_pos = GetNextBcpIndex(); found = true; - if (validate) { - if (!CheckAndRemoveLastChunkChecksum(oat_checksums, error_msg)) { - return false; - } - if (oat_checksums->empty() || !StartsWith(*oat_checksums, "i")) { - return true; // Let the caller deal with the dex file checksums if any. - } - } break; } } if (!found) { - if (validate) { - *error_msg = StringPrintf("Missing extension for %s, remaining checksum: %s", - bcp_component.c_str(), - std::string(*oat_checksums).c_str()); - return false; - } ++bcp_pos; } } @@ -2346,16 +2187,16 @@ bool ImageSpace::BootImageLayout::LoadOrValidate(FilenameFn&& filename_fn, return true; } -bool ImageSpace::BootImageLayout::LoadOrValidateFromSystem(InstructionSet image_isa, - /*inout*/std::string_view* oat_checksums, - /*out*/std::string* error_msg) { +bool ImageSpace::BootImageLayout::LoadFromSystem(InstructionSet image_isa, + bool allow_in_memory_compilation, + /*out*/ std::string* error_msg) { auto filename_fn = [image_isa](const std::string& location, /*out*/std::string* filename, /*out*/std::string* err_msg ATTRIBUTE_UNUSED) { *filename = GetSystemImageFilename(location.c_str(), image_isa); return true; }; - return LoadOrValidate(filename_fn, oat_checksums, error_msg); + return Load(filename_fn, allow_in_memory_compilation, error_msg); } class ImageSpace::BootImageLoader { @@ -2403,6 +2244,7 @@ class ImageSpace::BootImageLoader { bool HasSystem() const { return has_system_; } bool LoadFromSystem(size_t extra_reservation_size, + bool allow_in_memory_compilation, /*out*/std::vector<std::unique_ptr<ImageSpace>>* boot_image_spaces, /*out*/MemMap* extra_reservation, /*out*/std::string* error_msg) REQUIRES_SHARED(Locks::mutator_lock_); @@ -2697,7 +2539,8 @@ class ImageSpace::BootImageLoader { int32_t class_roots_index = enum_cast<int32_t>(ImageHeader::kClassRoots); DCHECK_LT(class_roots_index, image_roots->GetLength<kVerifyNone>()); class_roots = ObjPtr<mirror::ObjectArray<mirror::Class>>::DownCast(base_relocate_visitor( - image_roots->GetWithoutChecks<kVerifyNone>(class_roots_index).Ptr())); + image_roots->GetWithoutChecks<kVerifyNone, + kWithoutReadBarrier>(class_roots_index).Ptr())); if (kExtension) { // Class roots must have been visited if we relocated the primary boot image. DCHECK(base_diff == 0 || patched_objects->Test(class_roots.Ptr())); @@ -2863,6 +2706,14 @@ class ImageSpace::BootImageLoader { DCHECK_EQ(base_diff64, 0); } + // While `Thread::Current()` is null, the `ScopedDebugDisallowReadBarriers` + // cannot be used but the class `ReadBarrier` shall not allow read barriers anyway. + // For some gtests we actually have an initialized `Thread:Current()`. + std::optional<ScopedDebugDisallowReadBarriers> sddrb(std::nullopt); + if (kCheckDebugDisallowReadBarrierCount && Thread::Current() != nullptr) { + sddrb.emplace(Thread::Current()); + } + ArrayRef<const std::unique_ptr<ImageSpace>> spaces_ref(spaces); PointerSize pointer_size = first_space_header.GetPointerSize(); if (pointer_size == PointerSize::k64) { @@ -3111,8 +2962,24 @@ class ImageSpace::BootImageLoader { return false; } } + + // As an optimization, madvise the oat file into memory if it's being used + // for execution with an active runtime. This can significantly improve + // ZygoteInit class preload performance. + if (executable_) { + Runtime* runtime = Runtime::Current(); + if (runtime != nullptr) { + Runtime::MadviseFileForRange(runtime->GetMadviseWillNeedSizeOdex(), + oat_file->Size(), + oat_file->Begin(), + oat_file->End(), + oat_file->GetLocation()); + } + } + space->oat_file_ = std::move(oat_file); space->oat_file_non_owned_ = space->oat_file_.get(); + return true; } @@ -3345,6 +3212,7 @@ class ImageSpace::BootImageLoader { bool ImageSpace::BootImageLoader::LoadFromSystem( size_t extra_reservation_size, + bool allow_in_memory_compilation, /*out*/std::vector<std::unique_ptr<ImageSpace>>* boot_image_spaces, /*out*/MemMap* extra_reservation, /*out*/std::string* error_msg) { @@ -3357,7 +3225,7 @@ bool ImageSpace::BootImageLoader::LoadFromSystem( boot_class_path_image_fds_, boot_class_path_vdex_fds_, boot_class_path_oat_fds_); - if (!layout.LoadFromSystem(image_isa_, error_msg)) { + if (!layout.LoadFromSystem(image_isa_, allow_in_memory_compilation, error_msg)) { return false; } @@ -3420,6 +3288,7 @@ bool ImageSpace::LoadBootImage( bool relocate, bool executable, size_t extra_reservation_size, + bool allow_in_memory_compilation, /*out*/std::vector<std::unique_ptr<ImageSpace>>* boot_image_spaces, /*out*/MemMap* extra_reservation) { ScopedTrace trace(__FUNCTION__); @@ -3450,8 +3319,11 @@ bool ImageSpace::LoadBootImage( std::vector<std::string> error_msgs; std::string error_msg; - if (loader.LoadFromSystem( - extra_reservation_size, boot_image_spaces, extra_reservation, &error_msg)) { + if (loader.LoadFromSystem(extra_reservation_size, + allow_in_memory_compilation, + boot_image_spaces, + extra_reservation, + &error_msg)) { return true; } error_msgs.push_back(error_msg); @@ -3519,54 +3391,66 @@ void ImageSpace::Dump(std::ostream& os) const { << ",name=\"" << GetName() << "\"]"; } -bool ImageSpace::ValidateApexVersions(const OatFile& oat_file, std::string* error_msg) { +bool ImageSpace::ValidateApexVersions(const OatHeader& oat_header, + const std::string& apex_versions, + const std::string& file_location, + std::string* error_msg) { // For a boot image, the key value store only exists in the first OAT file. Skip other OAT files. - if (oat_file.GetOatHeader().GetKeyValueStoreSize() == 0) { - return true; - } - - // The OAT files in the ART APEX is built on host, so they don't have the right APEX versions. It - // is safe to assume that they are always up-to-date because they are shipped along with the - // runtime and the dex files. - if (kIsTargetAndroid && android::base::StartsWith(oat_file.GetLocation(), GetArtRoot())) { + if (oat_header.GetKeyValueStoreSize() == 0) { return true; } - const char* oat_apex_versions = - oat_file.GetOatHeader().GetStoreValueByKey(OatHeader::kApexVersionsKey); + const char* oat_apex_versions = oat_header.GetStoreValueByKey(OatHeader::kApexVersionsKey); if (oat_apex_versions == nullptr) { *error_msg = StringPrintf("ValidateApexVersions failed to get APEX versions from oat file '%s'", - oat_file.GetLocation().c_str()); + file_location.c_str()); return false; } // For a boot image, it can be generated from a subset of the bootclasspath. // For an app image, some dex files get compiled with a subset of the bootclasspath. // For such cases, the OAT APEX versions will be a prefix of the runtime APEX versions. - if (!android::base::StartsWith(Runtime::Current()->GetApexVersions(), oat_apex_versions)) { + if (!android::base::StartsWith(apex_versions, oat_apex_versions)) { *error_msg = StringPrintf( "ValidateApexVersions found APEX versions mismatch between oat file '%s' and the runtime " "(Oat file: '%s', Runtime: '%s')", - oat_file.GetLocation().c_str(), + file_location.c_str(), oat_apex_versions, - Runtime::Current()->GetApexVersions().c_str()); + apex_versions.c_str()); return false; } return true; } bool ImageSpace::ValidateOatFile(const OatFile& oat_file, std::string* error_msg) { - return ValidateOatFile(oat_file, error_msg, ArrayRef<const std::string>(), ArrayRef<const int>()); + DCHECK(Runtime::Current() != nullptr); + return ValidateOatFile(oat_file, + error_msg, + ArrayRef<const std::string>(), + ArrayRef<const int>(), + Runtime::Current()->GetApexVersions()); } bool ImageSpace::ValidateOatFile(const OatFile& oat_file, std::string* error_msg, ArrayRef<const std::string> dex_filenames, - ArrayRef<const int> dex_fds) { - if (!ValidateApexVersions(oat_file, error_msg)) { + ArrayRef<const int> dex_fds, + const std::string& apex_versions) { + if (!ValidateApexVersions(oat_file.GetOatHeader(), + apex_versions, + oat_file.GetLocation(), + error_msg)) { + return false; + } + + // For a boot image, the key value store only exists in the first OAT file. Skip other OAT files. + if (oat_file.GetOatHeader().GetKeyValueStoreSize() != 0 && + oat_file.GetOatHeader().IsConcurrentCopying() != gUseReadBarrier) { + *error_msg = + "ValidateOatFile found read barrier state mismatch (oat file: {}, runtime: {})"_format( + oat_file.GetOatHeader().IsConcurrentCopying(), gUseReadBarrier); return false; } - const ArtDexFileLoader dex_file_loader; size_t dex_file_index = 0; for (const OatDexFile* oat_dex_file : oat_file.GetOatDexFiles()) { // Skip multidex locations - These will be checked when we visit their @@ -3583,7 +3467,7 @@ bool ImageSpace::ValidateOatFile(const OatFile& oat_file, std::vector<uint32_t> checksums; std::vector<std::string> dex_locations_ignored; - if (!dex_file_loader.GetMultiDexChecksums( + if (!ArtDexFileLoader::GetMultiDexChecksums( dex_file_location.c_str(), &checksums, &dex_locations_ignored, error_msg, dex_fd)) { *error_msg = StringPrintf("ValidateOatFile failed to get checksums of dex file '%s' " "referenced by oat file %s: %s", @@ -3695,7 +3579,7 @@ size_t ImageSpace::GetNumberOfComponents(ArrayRef<ImageSpace* const> image_space return n; } -static size_t CheckAndCountBCPComponents(std::string_view oat_boot_class_path, +size_t ImageSpace::CheckAndCountBCPComponents(std::string_view oat_boot_class_path, ArrayRef<const std::string> boot_class_path, /*out*/std::string* error_msg) { // Check that the oat BCP is a prefix of current BCP locations and count components. @@ -3727,110 +3611,6 @@ static size_t CheckAndCountBCPComponents(std::string_view oat_boot_class_path, return component_count; } -bool ImageSpace::VerifyBootClassPathChecksums(std::string_view oat_checksums, - std::string_view oat_boot_class_path, - ArrayRef<const std::string> image_locations, - ArrayRef<const std::string> boot_class_path_locations, - ArrayRef<const std::string> boot_class_path, - ArrayRef<const int> boot_class_path_fds, - InstructionSet image_isa, - /*out*/std::string* error_msg) { - if (oat_checksums.empty() || oat_boot_class_path.empty()) { - *error_msg = oat_checksums.empty() ? "Empty checksums." : "Empty boot class path."; - return false; - } - - DCHECK_EQ(boot_class_path_locations.size(), boot_class_path.size()); - size_t bcp_size = - CheckAndCountBCPComponents(oat_boot_class_path, boot_class_path_locations, error_msg); - if (bcp_size == static_cast<size_t>(-1)) { - DCHECK(!error_msg->empty()); - return false; - } - - size_t bcp_pos = 0u; - if (StartsWith(oat_checksums, "i")) { - // Use only the matching part of the BCP for validation. FDs are optional, so only pass the - // sub-array if provided. - ArrayRef<const int> bcp_fds = boot_class_path_fds.empty() - ? ArrayRef<const int>() - : boot_class_path_fds.SubArray(/*pos=*/ 0u, bcp_size); - BootImageLayout layout(image_locations, - boot_class_path.SubArray(/*pos=*/ 0u, bcp_size), - boot_class_path_locations.SubArray(/*pos=*/ 0u, bcp_size), - bcp_fds, - /*boot_class_path_image_fds=*/ ArrayRef<const int>(), - /*boot_class_path_vdex_fds=*/ ArrayRef<const int>(), - /*boot_class_path_oat_fds=*/ ArrayRef<const int>()); - std::string primary_image_location = layout.GetPrimaryImageLocation(); - std::string system_filename; - bool has_system = false; - if (!FindImageFilename(primary_image_location.c_str(), - image_isa, - &system_filename, - &has_system)) { - *error_msg = StringPrintf("Unable to find image file for %s and %s", - android::base::Join(image_locations, kComponentSeparator).c_str(), - GetInstructionSetString(image_isa)); - return false; - } - - DCHECK(has_system); - if (!layout.ValidateFromSystem(image_isa, &oat_checksums, error_msg)) { - return false; - } - bcp_pos = layout.GetNextBcpIndex(); - } - - for ( ; bcp_pos != bcp_size; ++bcp_pos) { - static_assert(ImageSpace::kDexFileChecksumPrefix == 'd', "Format prefix check."); - if (!StartsWith(oat_checksums, "d")) { - *error_msg = StringPrintf("Missing dex checksums, expected %s to start with 'd'", - std::string(oat_checksums).c_str()); - return false; - } - oat_checksums.remove_prefix(1u); - - const std::string& bcp_filename = boot_class_path[bcp_pos]; - std::vector<uint32_t> checksums; - std::vector<std::string> dex_locations; - const ArtDexFileLoader dex_file_loader; - if (!dex_file_loader.GetMultiDexChecksums(bcp_filename.c_str(), - &checksums, - &dex_locations, - error_msg)) { - return false; - } - DCHECK(!checksums.empty()); - for (uint32_t checksum : checksums) { - std::string dex_file_checksum = StringPrintf("/%08x", checksum); - if (!StartsWith(oat_checksums, dex_file_checksum)) { - *error_msg = StringPrintf( - "Dex checksum mismatch for bootclasspath file %s, expected %s to start with %s", - bcp_filename.c_str(), - std::string(oat_checksums).c_str(), - dex_file_checksum.c_str()); - return false; - } - oat_checksums.remove_prefix(dex_file_checksum.size()); - } - if (bcp_pos + 1u != bcp_size) { - if (!StartsWith(oat_checksums, ":")) { - *error_msg = StringPrintf("Missing ':' separator at start of %s", - std::string(oat_checksums).c_str()); - return false; - } - oat_checksums.remove_prefix(1u); - } - } - if (!oat_checksums.empty()) { - *error_msg = StringPrintf("Checksum too long, unexpected tail %s", - std::string(oat_checksums).c_str()); - return false; - } - return true; -} - bool ImageSpace::VerifyBootClassPathChecksums( std::string_view oat_checksums, std::string_view oat_boot_class_path, diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h index 8a93f2bad1..1a85456961 100644 --- a/runtime/gc/space/image_space.h +++ b/runtime/gc/space/image_space.h @@ -17,16 +17,19 @@ #ifndef ART_RUNTIME_GC_SPACE_IMAGE_SPACE_H_ #define ART_RUNTIME_GC_SPACE_IMAGE_SPACE_H_ +#include "android-base/unique_fd.h" +#include "base/array_ref.h" #include "gc/accounting/space_bitmap.h" #include "image.h" +#include "runtime.h" #include "space.h" namespace art { -template <typename T> class ArrayRef; class DexFile; enum class InstructionSet; class OatFile; +class OatHeader; namespace gc { namespace space { @@ -142,6 +145,7 @@ class ImageSpace : public MemMapSpace { bool relocate, bool executable, size_t extra_reservation_size, + bool allow_in_memory_compilation, /*out*/std::vector<std::unique_ptr<ImageSpace>>* boot_image_spaces, /*out*/MemMap* extra_reservation) REQUIRES_SHARED(Locks::mutator_lock_); @@ -239,18 +243,6 @@ class ImageSpace : public MemMapSpace { // Returns the total number of components (jar files) associated with the image spaces. static size_t GetNumberOfComponents(ArrayRef<gc::space::ImageSpace* const> image_spaces); - // Returns whether the checksums are valid for the given boot class path, - // image location and ISA (may differ from the ISA of an initialized Runtime). - // The boot image and dex files do not need to be loaded in memory. - static bool VerifyBootClassPathChecksums(std::string_view oat_checksums, - std::string_view oat_boot_class_path, - ArrayRef<const std::string> image_locations, - ArrayRef<const std::string> boot_class_path_locations, - ArrayRef<const std::string> boot_class_path, - ArrayRef<const int> boot_class_path_fds, - InstructionSet image_isa, - /*out*/std::string* error_msg); - // Returns whether the oat checksums and boot class path description are valid // for the given boot image spaces and boot class path. Used for boot image extensions. static bool VerifyBootClassPathChecksums( @@ -267,8 +259,11 @@ class ImageSpace : public MemMapSpace { const std::string& image_location, bool boot_image_extension = false); - // Returns true if the APEX versions in the OAT file match the current APEX versions. - static bool ValidateApexVersions(const OatFile& oat_file, std::string* error_msg); + // Returns true if the APEX versions in the OAT header match the given APEX versions. + static bool ValidateApexVersions(const OatHeader& oat_header, + const std::string& apex_versions, + const std::string& file_location, + std::string* error_msg); // Returns true if the dex checksums in the given oat file match the // checksums of the original dex files on disk. This is intended to be used @@ -279,17 +274,23 @@ class ImageSpace : public MemMapSpace { // oat and odex file. // // This function is exposed for testing purposes. + // + // Calling this function requires an active runtime. static bool ValidateOatFile(const OatFile& oat_file, std::string* error_msg); // Same as above, but allows to use `dex_filenames` and `dex_fds` to find the dex files instead of - // using the dex filenames in the header of the oat file. This overload is useful when the actual - // dex filenames are different from what's in the header (e.g., when we run dex2oat on host), or - // when the runtime can only access files through FDs (e.g., when we run dex2oat on target in a - // restricted SELinux domain). + // using the dex filenames in the header of the oat file, and also takes `apex_versions` from the + // input. This overload is useful when the actual dex filenames are different from what's in the + // header (e.g., when we run dex2oat on host), when the runtime can only access files through FDs + // (e.g., when we run dex2oat on target in a restricted SELinux domain), or when there is no + // active runtime. + // + // Calling this function does not require an active runtime. static bool ValidateOatFile(const OatFile& oat_file, std::string* error_msg, ArrayRef<const std::string> dex_filenames, - ArrayRef<const int> dex_fds); + ArrayRef<const int> dex_fds, + const std::string& apex_versions); // Return the end of the image which includes non-heap objects such as ArtMethods and ArtFields. uint8_t* GetImageEnd() const { @@ -303,6 +304,182 @@ class ImageSpace : public MemMapSpace { void ReleaseMetadata() REQUIRES_SHARED(Locks::mutator_lock_); + static void AppendImageChecksum(uint32_t component_count, + uint32_t checksum, + /*inout*/ std::string* checksums); + + static size_t CheckAndCountBCPComponents(std::string_view oat_boot_class_path, + ArrayRef<const std::string> boot_class_path, + /*out*/ std::string* error_msg); + + // Helper class to find the primary boot image and boot image extensions + // and determine the boot image layout. + class BootImageLayout { + public: + // Description of a "chunk" of the boot image, i.e. either primary boot image + // or a boot image extension, used in conjunction with the boot class path to + // load boot image components. + struct ImageChunk { + std::string base_location; + std::string base_filename; + std::vector<std::string> profile_files; + size_t start_index; + uint32_t component_count; + uint32_t image_space_count; + uint32_t reservation_size; + uint32_t checksum; + uint32_t boot_image_component_count; + uint32_t boot_image_checksum; + uint32_t boot_image_size; + + // The following file descriptors hold the memfd files for extensions compiled + // in memory and described by the above fields. We want to use them to mmap() + // the contents and then close them while treating the ImageChunk description + // as immutable (const), so make these fields explicitly mutable. + mutable android::base::unique_fd art_fd; + mutable android::base::unique_fd vdex_fd; + mutable android::base::unique_fd oat_fd; + }; + + BootImageLayout(ArrayRef<const std::string> image_locations, + ArrayRef<const std::string> boot_class_path, + ArrayRef<const std::string> boot_class_path_locations, + ArrayRef<const int> boot_class_path_fds, + ArrayRef<const int> boot_class_path_image_fds, + ArrayRef<const int> boot_class_path_vdex_fds, + ArrayRef<const int> boot_class_path_oat_fds, + const std::string* apex_versions = nullptr) + : image_locations_(image_locations), + boot_class_path_(boot_class_path), + boot_class_path_locations_(boot_class_path_locations), + boot_class_path_fds_(boot_class_path_fds), + boot_class_path_image_fds_(boot_class_path_image_fds), + boot_class_path_vdex_fds_(boot_class_path_vdex_fds), + boot_class_path_oat_fds_(boot_class_path_oat_fds), + apex_versions_(GetApexVersions(apex_versions)) {} + + std::string GetPrimaryImageLocation(); + + bool LoadFromSystem(InstructionSet image_isa, + bool allow_in_memory_compilation, + /*out*/ std::string* error_msg); + + ArrayRef<const ImageChunk> GetChunks() const { return ArrayRef<const ImageChunk>(chunks_); } + + uint32_t GetBaseAddress() const { return base_address_; } + + size_t GetNextBcpIndex() const { return next_bcp_index_; } + + size_t GetTotalComponentCount() const { return total_component_count_; } + + size_t GetTotalReservationSize() const { return total_reservation_size_; } + + private: + struct NamedComponentLocation { + std::string base_location; + size_t bcp_index; + std::vector<std::string> profile_filenames; + }; + + std::string ExpandLocationImpl(const std::string& location, + size_t bcp_index, + bool boot_image_extension) { + std::vector<std::string> expanded = ExpandMultiImageLocations( + ArrayRef<const std::string>(boot_class_path_).SubArray(bcp_index, 1u), + location, + boot_image_extension); + DCHECK_EQ(expanded.size(), 1u); + return expanded[0]; + } + + std::string ExpandLocation(const std::string& location, size_t bcp_index) { + if (bcp_index == 0u) { + DCHECK_EQ(location, + ExpandLocationImpl(location, bcp_index, /*boot_image_extension=*/false)); + return location; + } else { + return ExpandLocationImpl(location, bcp_index, /*boot_image_extension=*/true); + } + } + + std::string GetBcpComponentPath(size_t bcp_index) { + DCHECK_LE(bcp_index, boot_class_path_.size()); + size_t bcp_slash_pos = boot_class_path_[bcp_index].rfind('/'); + DCHECK_NE(bcp_slash_pos, std::string::npos); + return boot_class_path_[bcp_index].substr(0u, bcp_slash_pos + 1u); + } + + bool VerifyImageLocation(ArrayRef<const std::string> components, + /*out*/ size_t* named_components_count, + /*out*/ std::string* error_msg); + + bool MatchNamedComponents( + ArrayRef<const std::string> named_components, + /*out*/ std::vector<NamedComponentLocation>* named_component_locations, + /*out*/ std::string* error_msg); + + bool ValidateBootImageChecksum(const char* file_description, + const ImageHeader& header, + /*out*/ std::string* error_msg); + + bool ValidateHeader(const ImageHeader& header, + size_t bcp_index, + const char* file_description, + /*out*/ std::string* error_msg); + + bool ValidateOatFile(const std::string& base_location, + const std::string& base_filename, + size_t bcp_index, + size_t component_count, + /*out*/ std::string* error_msg); + + bool ReadHeader(const std::string& base_location, + const std::string& base_filename, + size_t bcp_index, + /*out*/ std::string* error_msg); + + // Compiles a consecutive subsequence of bootclasspath dex files, whose contents are included in + // the profiles specified by `profile_filenames`, starting from `bcp_index`. + bool CompileBootclasspathElements(const std::string& base_location, + const std::string& base_filename, + size_t bcp_index, + const std::vector<std::string>& profile_filenames, + ArrayRef<const std::string> dependencies, + /*out*/ std::string* error_msg); + + // Returns true if a least one chuck has been loaded. + template <typename FilenameFn> + bool Load(FilenameFn&& filename_fn, + bool allow_in_memory_compilation, + /*out*/ std::string* error_msg); + + // This function prefers taking APEX versions from the input instead of from the runtime if + // possible. If the input is present, `ValidateFromSystem` can work without an active runtime. + static const std::string& GetApexVersions(const std::string* apex_versions) { + if (apex_versions == nullptr) { + DCHECK(Runtime::Current() != nullptr); + return Runtime::Current()->GetApexVersions(); + } else { + return *apex_versions; + } + } + + ArrayRef<const std::string> image_locations_; + ArrayRef<const std::string> boot_class_path_; + ArrayRef<const std::string> boot_class_path_locations_; + ArrayRef<const int> boot_class_path_fds_; + ArrayRef<const int> boot_class_path_image_fds_; + ArrayRef<const int> boot_class_path_vdex_fds_; + ArrayRef<const int> boot_class_path_oat_fds_; + + std::vector<ImageChunk> chunks_; + uint32_t base_address_ = 0u; + size_t next_bcp_index_ = 0u; + size_t total_component_count_ = 0u; + size_t total_reservation_size_ = 0u; + const std::string& apex_versions_; + }; + protected: // Tries to initialize an ImageSpace from the given image path, returning null on error. // @@ -342,7 +519,6 @@ class ImageSpace : public MemMapSpace { friend class Space; private: - class BootImageLayout; class BootImageLoader; template <typename ReferenceVisitor> class ClassTableVisitor; diff --git a/runtime/gc/space/image_space_test.cc b/runtime/gc/space/image_space_test.cc index 3a6d0e12e2..d6bb86b11b 100644 --- a/runtime/gc/space/image_space_test.cc +++ b/runtime/gc/space/image_space_test.cc @@ -50,7 +50,7 @@ class ImageSpaceTest : public CommonRuntimeTest { }; TEST_F(ImageSpaceTest, StringDeduplication) { - const char* const kBaseNames[] = { "Extension1", "Extension2" }; + const char* const kBaseNames[] = {"Extension1", "Extension2"}; ScratchDir scratch; const std::string& scratch_dir = scratch.GetPath(); @@ -77,7 +77,7 @@ TEST_F(ImageSpaceTest, StringDeduplication) { std::vector<std::string> extension_image_locations; for (const char* base_name : kBaseNames) { std::string jar_name = GetTestDexFileName(base_name); - ArrayRef<const std::string> dex_files(&jar_name, /*size=*/ 1u); + ArrayRef<const std::string> dex_files(&jar_name, /*size=*/1u); ScratchFile profile_file; GenerateBootProfile(dex_files, profile_file.GetFile()); std::vector<std::string> extra_args = { @@ -94,8 +94,8 @@ TEST_F(ImageSpaceTest, StringDeduplication) { ASSERT_TRUE(success) << error_msg; bcp.push_back(jar_name); bcp_locations.push_back(jar_name); - extension_image_locations.push_back( - scratch_dir + prefix + '-' + GetFilenameBase(jar_name) + ".art"); + extension_image_locations.push_back(scratch_dir + prefix + '-' + GetFilenameBase(jar_name) + + ".art"); } // Also compile the second extension as an app with app image. @@ -104,26 +104,27 @@ TEST_F(ImageSpaceTest, StringDeduplication) { std::string app_odex_name = scratch_dir + app_base_name + ".odex"; std::string app_image_name = scratch_dir + app_base_name + ".art"; { - ArrayRef<const std::string> dex_files(&app_jar_name, /*size=*/ 1u); + ArrayRef<const std::string> dex_files(&app_jar_name, /*size=*/1u); ScratchFile profile_file; GenerateProfile(dex_files, profile_file.GetFile()); std::vector<std::string> argv; std::string error_msg; - bool success = StartDex2OatCommandLine(&argv, &error_msg, /*use_runtime_bcp_and_image=*/ false); + bool success = StartDex2OatCommandLine(&argv, &error_msg, /*use_runtime_bcp_and_image=*/false); ASSERT_TRUE(success) << error_msg; - argv.insert(argv.end(), { - "--profile-file=" + profile_file.GetFilename(), - "--runtime-arg", - "-Xbootclasspath:" + base_bcp_string, - "--runtime-arg", - "-Xbootclasspath-locations:" + base_bcp_locations_string, - "--boot-image=" + base_image_location, - "--dex-file=" + app_jar_name, - "--dex-location=" + app_jar_name, - "--oat-file=" + app_odex_name, - "--app-image-file=" + app_image_name, - "--initialize-app-image-classes=true", - }); + argv.insert(argv.end(), + { + "--profile-file=" + profile_file.GetFilename(), + "--runtime-arg", + "-Xbootclasspath:" + base_bcp_string, + "--runtime-arg", + "-Xbootclasspath-locations:" + base_bcp_locations_string, + "--boot-image=" + base_image_location, + "--dex-file=" + app_jar_name, + "--dex-location=" + app_jar_name, + "--oat-file=" + app_odex_name, + "--app-image-file=" + app_image_name, + "--initialize-app-image-classes=true", + }); success = RunDex2Oat(argv, &error_msg); ASSERT_TRUE(success) << error_msg; } @@ -136,15 +137,16 @@ TEST_F(ImageSpaceTest, StringDeduplication) { extra_reservation = MemMap::Invalid(); return ImageSpace::LoadBootImage(bcp, bcp_locations, - /*boot_class_path_fds=*/ std::vector<int>(), - /*boot_class_path_image_fds=*/ std::vector<int>(), - /*boot_class_path_vdex_fds=*/ std::vector<int>(), - /*boot_class_path_oat_fds=*/ std::vector<int>(), + /*boot_class_path_fds=*/std::vector<int>(), + /*boot_class_path_image_fds=*/std::vector<int>(), + /*boot_class_path_vdex_fds=*/std::vector<int>(), + /*boot_class_path_oat_fds=*/std::vector<int>(), full_image_locations, kRuntimeISA, - /*relocate=*/ false, - /*executable=*/ true, - /*extra_reservation_size=*/ 0u, + /*relocate=*/false, + /*executable=*/true, + /*extra_reservation_size=*/0u, + /*allow_in_memory_compilation=*/false, &boot_image_spaces, &extra_reservation); }; @@ -153,13 +155,13 @@ TEST_F(ImageSpaceTest, StringDeduplication) { size_t test_string_length = std::size(test_string) - 1u; // Equals UTF-16 length. uint32_t hash = InternTable::Utf8String::Hash(test_string_length, test_string); InternTable::Utf8String utf8_test_string(test_string_length, test_string); - auto contains_test_string = [utf8_test_string, hash](ImageSpace* space) - REQUIRES_SHARED(Locks::mutator_lock_) { + auto contains_test_string = [utf8_test_string, + hash](ImageSpace* space) REQUIRES_SHARED(Locks::mutator_lock_) { const ImageHeader& image_header = space->GetImageHeader(); if (image_header.GetInternedStringsSection().Size() != 0u) { const uint8_t* data = space->Begin() + image_header.GetInternedStringsSection().Offset(); size_t read_count; - InternTable::UnorderedSet temp_set(data, /*make_copy_of_data=*/ false, &read_count); + InternTable::UnorderedSet temp_set(data, /*make_copy_of_data=*/false, &read_count); return temp_set.FindWithHash(utf8_test_string, hash) != temp_set.end(); } else { return false; @@ -170,8 +172,7 @@ TEST_F(ImageSpaceTest, StringDeduplication) { ScopedObjectAccess soa(Thread::Current()); ASSERT_EQ(2u, extension_image_locations.size()); full_image_locations = { - base_image_location, extension_image_locations[0], extension_image_locations[1] - }; + base_image_location, extension_image_locations[0], extension_image_locations[1]}; bool success = load_boot_image(); ASSERT_TRUE(success); ASSERT_EQ(bcp.size(), boot_image_spaces.size()); @@ -183,8 +184,7 @@ TEST_F(ImageSpaceTest, StringDeduplication) { std::swap(bcp[bcp.size() - 2u], bcp[bcp.size() - 1u]); std::swap(bcp_locations[bcp_locations.size() - 2u], bcp_locations[bcp_locations.size() - 1u]); full_image_locations = { - base_image_location, extension_image_locations[1], extension_image_locations[0] - }; + base_image_location, extension_image_locations[1], extension_image_locations[0]}; success = load_boot_image(); ASSERT_TRUE(success); ASSERT_EQ(bcp.size(), boot_image_spaces.size()); @@ -203,21 +203,21 @@ TEST_F(ImageSpaceTest, StringDeduplication) { // Load the app odex file and app image. std::string error_msg; - std::unique_ptr<OatFile> odex_file(OatFile::Open(/*zip_fd=*/ -1, - app_odex_name.c_str(), - app_odex_name.c_str(), - /*executable=*/ false, - /*low_4gb=*/ false, + std::unique_ptr<OatFile> odex_file(OatFile::Open(/*zip_fd=*/-1, + app_odex_name, + app_odex_name, + /*executable=*/false, + /*low_4gb=*/false, app_jar_name, &error_msg)); ASSERT_TRUE(odex_file != nullptr) << error_msg; std::vector<ImageSpace*> non_owning_boot_image_spaces = MakeNonOwningPointerVector(boot_image_spaces); - std::unique_ptr<ImageSpace> app_image_space = ImageSpace::CreateFromAppImage( - app_image_name.c_str(), - odex_file.get(), - ArrayRef<ImageSpace* const>(non_owning_boot_image_spaces), - &error_msg); + std::unique_ptr<ImageSpace> app_image_space = + ImageSpace::CreateFromAppImage(app_image_name.c_str(), + odex_file.get(), + ArrayRef<ImageSpace* const>(non_owning_boot_image_spaces), + &error_msg); ASSERT_TRUE(app_image_space != nullptr) << error_msg; // The string in the app image should be replaced and removed from interned string section. @@ -242,25 +242,25 @@ TEST_F(DexoptTest, ValidateOatFile) { args.push_back("--oat-file=" + oat_location); ASSERT_TRUE(Dex2Oat(args, &error_msg)) << error_msg; - std::unique_ptr<OatFile> oat(OatFile::Open(/*zip_fd=*/ -1, - oat_location.c_str(), - oat_location.c_str(), - /*executable=*/ false, - /*low_4gb=*/ false, + std::unique_ptr<OatFile> oat(OatFile::Open(/*zip_fd=*/-1, + oat_location, + oat_location, + /*executable=*/false, + /*low_4gb=*/false, &error_msg)); ASSERT_TRUE(oat != nullptr) << error_msg; { // Test opening the oat file also with explicit dex filenames. - std::vector<std::string> dex_filenames{ dex1, multidex1, dex2 }; - std::unique_ptr<OatFile> oat2(OatFile::Open(/*zip_fd=*/ -1, - oat_location.c_str(), - oat_location.c_str(), - /*executable=*/ false, - /*low_4gb=*/ false, + std::vector<std::string> dex_filenames{dex1, multidex1, dex2}; + std::unique_ptr<OatFile> oat2(OatFile::Open(/*zip_fd=*/-1, + oat_location, + oat_location, + /*executable=*/false, + /*low_4gb=*/false, ArrayRef<const std::string>(dex_filenames), - /*dex_fds=*/ ArrayRef<const int>(), - /*reservation=*/ nullptr, + /*dex_fds=*/ArrayRef<const int>(), + /*reservation=*/nullptr, &error_msg)); ASSERT_TRUE(oat2 != nullptr) << error_msg; } @@ -321,56 +321,6 @@ TEST_F(DexoptTest, ValidateOatFile) { EXPECT_FALSE(ImageSpace::ValidateOatFile(*oat, &error_msg)); } -TEST_F(DexoptTest, Checksums) { - Runtime* runtime = Runtime::Current(); - ASSERT_TRUE(runtime != nullptr); - ASSERT_FALSE(runtime->GetHeap()->GetBootImageSpaces().empty()); - - std::vector<std::string> bcp = runtime->GetBootClassPath(); - std::vector<std::string> bcp_locations = runtime->GetBootClassPathLocations(); - std::vector<const DexFile*> dex_files = runtime->GetClassLinker()->GetBootClassPath(); - - std::string error_msg; - auto create_and_verify = [&]() { - std::string checksums = gc::space::ImageSpace::GetBootClassPathChecksums( - ArrayRef<gc::space::ImageSpace* const>(runtime->GetHeap()->GetBootImageSpaces()), - ArrayRef<const DexFile* const>(dex_files)); - return gc::space::ImageSpace::VerifyBootClassPathChecksums( - checksums, - android::base::Join(bcp_locations, ':'), - ArrayRef<const std::string>(runtime->GetImageLocations()), - ArrayRef<const std::string>(bcp_locations), - ArrayRef<const std::string>(bcp), - /*boot_class_path_fds=*/ ArrayRef<const int>(), - kRuntimeISA, - &error_msg); - }; - - ASSERT_TRUE(create_and_verify()) << error_msg; - - std::vector<std::unique_ptr<const DexFile>> opened_dex_files; - for (const std::string& src : { GetDexSrc1(), GetDexSrc2() }) { - std::vector<std::unique_ptr<const DexFile>> new_dex_files; - const ArtDexFileLoader dex_file_loader; - ASSERT_TRUE(dex_file_loader.Open(src.c_str(), - src, - /*verify=*/ true, - /*verify_checksum=*/ false, - &error_msg, - &new_dex_files)) - << error_msg; - - bcp.push_back(src); - bcp_locations.push_back(src); - for (std::unique_ptr<const DexFile>& df : new_dex_files) { - dex_files.push_back(df.get()); - opened_dex_files.push_back(std::move(df)); - } - - ASSERT_TRUE(create_and_verify()) << error_msg; - } -} - template <bool kImage, bool kRelocate> class ImageSpaceLoadingTest : public CommonRuntimeTest { protected: @@ -380,6 +330,7 @@ class ImageSpaceLoadingTest : public CommonRuntimeTest { options->emplace_back(android::base::StringPrintf("-Ximage:%s", image_location.c_str()), nullptr); options->emplace_back(kRelocate ? "-Xrelocate" : "-Xnorelocate", nullptr); + options->emplace_back("-Xallowinmemorycompilation", nullptr); // We want to test the relocation behavior of ImageSpace. As such, don't pretend we're a // compiler. diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc index 2d17a18a36..f1df45f19a 100644 --- a/runtime/gc/space/large_object_space.cc +++ b/runtime/gc/space/large_object_space.cc @@ -336,7 +336,7 @@ class AllocationInfo { size_t FreeListSpace::GetSlotIndexForAllocationInfo(const AllocationInfo* info) const { DCHECK_GE(info, allocation_info_); - DCHECK_LT(info, reinterpret_cast<AllocationInfo*>(allocation_info_map_.End())); + DCHECK_LE(info, reinterpret_cast<AllocationInfo*>(allocation_info_map_.End())); return info - allocation_info_; } @@ -457,6 +457,10 @@ size_t FreeListSpace::Free(Thread* self, mirror::Object* obj) { // The previous allocation info must not be free since we are supposed to always coalesce. DCHECK_EQ(info->GetPrevFreeBytes(), 0U) << "Previous allocation was free"; } + // NOTE: next_info could be pointing right after the allocation_info_map_ + // when freeing object in the very end of the space. But that's safe + // as we don't dereference it in that case. We only use it to calculate + // next_addr using offset within the map. uintptr_t next_addr = GetAddressForAllocationInfo(next_info); if (next_addr >= free_end_start) { // Easy case, the next chunk is the end free region. diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h index 50006568ca..59ab3f3214 100644 --- a/runtime/gc/space/malloc_space.h +++ b/runtime/gc/space/malloc_space.h @@ -38,7 +38,7 @@ class ZygoteSpace; // A common parent of DlMallocSpace and RosAllocSpace. class MallocSpace : public ContinuousMemMapAllocSpace { public: - typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg); + using WalkCallback = void (*)(void *start, void *end, size_t num_bytes, void* callback_arg); SpaceType GetType() const override { return kSpaceTypeMallocSpace; diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h index 901568e546..1026f42c27 100644 --- a/runtime/gc/space/region_space-inl.h +++ b/runtime/gc/space/region_space-inl.h @@ -17,8 +17,6 @@ #ifndef ART_RUNTIME_GC_SPACE_REGION_SPACE_INL_H_ #define ART_RUNTIME_GC_SPACE_REGION_SPACE_INL_H_ -#include "region_space.h" - #include "base/mutex-inl.h" #include "mirror/object-inl.h" #include "region_space.h" diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc index 171c5cdebc..60141d656b 100644 --- a/runtime/gc/space/region_space.cc +++ b/runtime/gc/space/region_space.cc @@ -36,7 +36,7 @@ static constexpr uint kEvacuateLivePercentThreshold = 75U; static constexpr bool kProtectClearedRegions = kIsDebugBuild; // Wether we poison memory areas occupied by dead objects in unevacuated regions. -static constexpr bool kPoisonDeadObjectsInUnevacuatedRegions = true; +static constexpr bool kPoisonDeadObjectsInUnevacuatedRegions = kIsDebugBuild; // Special 32-bit value used to poison memory areas occupied by dead // objects in unevacuated regions. Dereferencing this value is expected @@ -741,10 +741,19 @@ bool RegionSpace::LogFragmentationAllocFailure(std::ostream& os, max_contiguous_allocation = std::min(max_contiguous_allocation, regions_free_for_alloc * kRegionSize); if (failed_alloc_bytes > max_contiguous_allocation) { + // Region space does not normally fragment in the conventional sense. However we can run out + // of region space prematurely if we have many threads, each with a partially committed TLAB. + // The whole TLAB uses up region address space, but we only count the section that was + // actually given to the thread so far as allocated. For unlikely allocation request sequences + // involving largish objects that don't qualify for large objects space, we may also be unable + // to fully utilize entire TLABs, and thus generate enough actual fragmentation to get + // here. This appears less likely, since we usually reuse sufficiently large TLAB "tails" + // that are no longer needed. os << "; failed due to fragmentation (largest possible contiguous allocation " - << max_contiguous_allocation << " bytes). Number of " - << PrettySize(kRegionSize) - << " sized free regions are: " << regions_free_for_alloc; + << max_contiguous_allocation << " bytes). Number of " << PrettySize(kRegionSize) + << " sized free regions are: " << regions_free_for_alloc + << ". Likely cause: (1) Too much memory in use, and " + << "(2) many threads or many larger objects of the wrong kind"; return true; } // Caller's job to print failed_alloc_bytes. diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h index 1463eb7d2a..27b9e9c367 100644 --- a/runtime/gc/space/region_space.h +++ b/runtime/gc/space/region_space.h @@ -46,7 +46,7 @@ static constexpr bool kCyclicRegionAllocation = kIsDebugBuild; // A space that consists of equal-sized regions. class RegionSpace final : public ContinuousMemMapAllocSpace { public: - typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg); + using WalkCallback = void (*)(void *start, void *end, size_t num_bytes, void* callback_arg); enum EvacMode { kEvacModeNewlyAllocated, diff --git a/runtime/gc/system_weak.h b/runtime/gc/system_weak.h index ef85b3942f..77b9548211 100644 --- a/runtime/gc/system_weak.h +++ b/runtime/gc/system_weak.h @@ -48,7 +48,7 @@ class SystemWeakHolder : public AbstractSystemWeakHolder { void Allow() override REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!allow_disallow_lock_) { - CHECK(!kUseReadBarrier); + CHECK(!gUseReadBarrier); MutexLock mu(Thread::Current(), allow_disallow_lock_); allow_new_system_weak_ = true; new_weak_condition_.Broadcast(Thread::Current()); @@ -57,7 +57,7 @@ class SystemWeakHolder : public AbstractSystemWeakHolder { void Disallow() override REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!allow_disallow_lock_) { - CHECK(!kUseReadBarrier); + CHECK(!gUseReadBarrier); MutexLock mu(Thread::Current(), allow_disallow_lock_); allow_new_system_weak_ = false; } @@ -78,8 +78,8 @@ class SystemWeakHolder : public AbstractSystemWeakHolder { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(allow_disallow_lock_) { // Wait for GC's sweeping to complete and allow new records - while (UNLIKELY((!kUseReadBarrier && !allow_new_system_weak_) || - (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { + while (UNLIKELY((!gUseReadBarrier && !allow_new_system_weak_) || + (gUseReadBarrier && !self->GetWeakRefAccessEnabled()))) { // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the // presence of threads blocking for weak ref access. self->CheckEmptyCheckpointFromWeakRefAccess(&allow_disallow_lock_); diff --git a/runtime/gc/system_weak_test.cc b/runtime/gc/system_weak_test.cc index ca112972c2..dd936538e5 100644 --- a/runtime/gc/system_weak_test.cc +++ b/runtime/gc/system_weak_test.cc @@ -35,6 +35,10 @@ namespace art { namespace gc { class SystemWeakTest : public CommonRuntimeTest { + protected: + SystemWeakTest() { + use_boot_image_ = true; // Make the Runtime creation cheaper. + } }; struct CountingSystemWeakHolder : public SystemWeakHolder { @@ -111,6 +115,7 @@ static bool CollectorDoesAllowOrBroadcast() { CollectorType type = Runtime::Current()->GetHeap()->CurrentCollectorType(); switch (type) { case CollectorType::kCollectorTypeCMS: + case CollectorType::kCollectorTypeCMC: case CollectorType::kCollectorTypeCC: case CollectorType::kCollectorTypeSS: return true; @@ -124,6 +129,7 @@ static bool CollectorDoesDisallow() { CollectorType type = Runtime::Current()->GetHeap()->CurrentCollectorType(); switch (type) { case CollectorType::kCollectorTypeCMS: + case CollectorType::kCollectorTypeCMC: return true; default: @@ -149,7 +155,12 @@ TEST_F(SystemWeakTest, Keep) { // Expect the holder to have been called. EXPECT_EQ(CollectorDoesAllowOrBroadcast() ? 1U : 0U, cswh.allow_count_); EXPECT_EQ(CollectorDoesDisallow() ? 1U : 0U, cswh.disallow_count_); - EXPECT_EQ(1U, cswh.sweep_count_); + // Userfaultfd GC uses SweepSystemWeaks also for concurrent updation. + // TODO: Explore this can be reverted back to unconditionally compare with 1 + // once concurrent updation of native roots is full implemented in userfaultfd + // GC. + size_t expected_sweep_count = gUseUserfaultfd ? 2U : 1U; + EXPECT_EQ(expected_sweep_count, cswh.sweep_count_); // Expect the weak to not be cleared. EXPECT_FALSE(cswh.Get().IsNull()); @@ -170,7 +181,12 @@ TEST_F(SystemWeakTest, Discard) { // Expect the holder to have been called. EXPECT_EQ(CollectorDoesAllowOrBroadcast() ? 1U : 0U, cswh.allow_count_); EXPECT_EQ(CollectorDoesDisallow() ? 1U : 0U, cswh.disallow_count_); - EXPECT_EQ(1U, cswh.sweep_count_); + // Userfaultfd GC uses SweepSystemWeaks also for concurrent updation. + // TODO: Explore this can be reverted back to unconditionally compare with 1 + // once concurrent updation of native roots is full implemented in userfaultfd + // GC. + size_t expected_sweep_count = gUseUserfaultfd ? 2U : 1U; + EXPECT_EQ(expected_sweep_count, cswh.sweep_count_); // Expect the weak to be cleared. EXPECT_TRUE(cswh.Get().IsNull()); @@ -194,7 +210,12 @@ TEST_F(SystemWeakTest, Remove) { // Expect the holder to have been called. ASSERT_EQ(CollectorDoesAllowOrBroadcast() ? 1U : 0U, cswh.allow_count_); ASSERT_EQ(CollectorDoesDisallow() ? 1U : 0U, cswh.disallow_count_); - ASSERT_EQ(1U, cswh.sweep_count_); + // Userfaultfd GC uses SweepSystemWeaks also for concurrent updation. + // TODO: Explore this can be reverted back to unconditionally compare with 1 + // once concurrent updation of native roots is full implemented in userfaultfd + // GC. + size_t expected_sweep_count = gUseUserfaultfd ? 2U : 1U; + EXPECT_EQ(expected_sweep_count, cswh.sweep_count_); // Expect the weak to not be cleared. ASSERT_FALSE(cswh.Get().IsNull()); @@ -209,7 +230,7 @@ TEST_F(SystemWeakTest, Remove) { // Expectation: no change in the numbers. EXPECT_EQ(CollectorDoesAllowOrBroadcast() ? 1U : 0U, cswh.allow_count_); EXPECT_EQ(CollectorDoesDisallow() ? 1U : 0U, cswh.disallow_count_); - EXPECT_EQ(1U, cswh.sweep_count_); + EXPECT_EQ(expected_sweep_count, cswh.sweep_count_); } } // namespace gc diff --git a/runtime/gc/verification-inl.h b/runtime/gc/verification-inl.h new file mode 100644 index 0000000000..1ef96e2954 --- /dev/null +++ b/runtime/gc/verification-inl.h @@ -0,0 +1,63 @@ +/* + * Copyright 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_VERIFICATION_INL_H_ +#define ART_RUNTIME_GC_VERIFICATION_INL_H_ + +#include "verification.h" + +#include "mirror/class-inl.h" + +namespace art { +namespace gc { + +template <ReadBarrierOption kReadBarrierOption> +bool Verification::IsValidClassUnchecked(mirror::Class* klass) const { + mirror::Class* k1 = klass->GetClass<kVerifyNone, kReadBarrierOption>(); + if (!IsValidHeapObjectAddress(k1)) { + return false; + } + // `k1` should be class class, take the class again to verify. + // Note that this check may not be valid for the no image space + // since the class class might move around from moving GC. + mirror::Class* k2 = k1->GetClass<kVerifyNone, kReadBarrierOption>(); + if (!IsValidHeapObjectAddress(k2)) { + return false; + } + return k1 == k2; +} + +template <ReadBarrierOption kReadBarrierOption> +bool Verification::IsValidClass(mirror::Class* klass) const { + if (!IsValidHeapObjectAddress(klass)) { + return false; + } + return IsValidClassUnchecked<kReadBarrierOption>(klass); +} + +template <ReadBarrierOption kReadBarrierOption> +bool Verification::IsValidObject(mirror::Object* obj) const { + if (!IsValidHeapObjectAddress(obj)) { + return false; + } + mirror::Class* klass = obj->GetClass<kVerifyNone, kReadBarrierOption>(); + return IsValidClass(klass); +} + +} // namespace gc +} // namespace art + +#endif // ART_RUNTIME_GC_VERIFICATION_INL_H_ diff --git a/runtime/gc/verification.cc b/runtime/gc/verification.cc index 9e0b8a2ff1..195986f04d 100644 --- a/runtime/gc/verification.cc +++ b/runtime/gc/verification.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "verification.h" +#include "verification-inl.h" #include <iomanip> #include <sstream> @@ -29,23 +29,16 @@ namespace art { namespace gc { std::string Verification::DumpRAMAroundAddress(uintptr_t addr, uintptr_t bytes) const { - const uintptr_t dump_start = addr - bytes; - const uintptr_t dump_end = addr + bytes; + uintptr_t* dump_start = reinterpret_cast<uintptr_t*>(addr - bytes); + uintptr_t* dump_end = reinterpret_cast<uintptr_t*>(addr + bytes); std::ostringstream oss; - if (dump_start < dump_end && - IsAddressInHeapSpace(reinterpret_cast<const void*>(dump_start)) && - IsAddressInHeapSpace(reinterpret_cast<const void*>(dump_end - 1))) { - oss << " adjacent_ram="; - for (uintptr_t p = dump_start; p < dump_end; ++p) { - if (p == addr) { - // Marker of where the address is. - oss << "|"; - } - uint8_t* ptr = reinterpret_cast<uint8_t*>(p); - oss << std::hex << std::setfill('0') << std::setw(2) << static_cast<uintptr_t>(*ptr); + oss << " adjacent_ram="; + for (const uintptr_t* p = dump_start; p < dump_end; ++p) { + if (p == reinterpret_cast<uintptr_t*>(addr)) { + // Marker of where the address is. + oss << "|"; } - } else { - oss << " <invalid address>"; + oss << std::hex << std::setfill('0') << std::setw(sizeof(uintptr_t) * 2) << *p << " "; } return oss.str(); } @@ -93,7 +86,7 @@ void Verification::LogHeapCorruption(ObjPtr<mirror::Object> holder, std::ostringstream oss; oss << "GC tried to mark invalid reference " << ref << std::endl; oss << DumpObjectInfo(ref, "ref") << "\n"; - oss << DumpObjectInfo(holder.Ptr(), "holder"); + oss << DumpObjectInfo(holder.Ptr(), "holder") << "\n"; if (holder != nullptr) { mirror::Class* holder_klass = holder->GetClass<kVerifyNone, kWithoutReadBarrier>(); if (IsValidClass(holder_klass)) { @@ -132,25 +125,6 @@ bool Verification::IsValidHeapObjectAddress(const void* addr, space::Space** out return IsAligned<kObjectAlignment>(addr) && IsAddressInHeapSpace(addr, out_space); } -bool Verification::IsValidClass(const void* addr) const { - if (!IsValidHeapObjectAddress(addr)) { - return false; - } - mirror::Class* klass = reinterpret_cast<mirror::Class*>(const_cast<void*>(addr)); - mirror::Class* k1 = klass->GetClass<kVerifyNone, kWithoutReadBarrier>(); - if (!IsValidHeapObjectAddress(k1)) { - return false; - } - // `k1` should be class class, take the class again to verify. - // Note that this check may not be valid for the no image space since the class class might move - // around from moving GC. - mirror::Class* k2 = k1->GetClass<kVerifyNone, kWithoutReadBarrier>(); - if (!IsValidHeapObjectAddress(k2)) { - return false; - } - return k1 == k2; -} - using ObjectSet = std::set<mirror::Object*>; using WorkQueue = std::deque<std::pair<mirror::Object*, std::string>>; diff --git a/runtime/gc/verification.h b/runtime/gc/verification.h index 6b456fd349..7a5d01a40a 100644 --- a/runtime/gc/verification.h +++ b/runtime/gc/verification.h @@ -19,6 +19,7 @@ #include "obj_ptr.h" #include "offsets.h" +#include "read_barrier_option.h" namespace art { @@ -50,7 +51,16 @@ class Verification { bool fatal) const REQUIRES_SHARED(Locks::mutator_lock_); // Return true if the klass is likely to be a valid mirror::Class. - bool IsValidClass(const void* klass) const REQUIRES_SHARED(Locks::mutator_lock_); + // Returns true if the class is a valid mirror::Class or possibly spuriously. + template <ReadBarrierOption kReadBarrierOption = kWithoutReadBarrier> + bool IsValidClassUnchecked(mirror::Class* klass) const + REQUIRES_SHARED(Locks::mutator_lock_); + // Return true if the klass is likely to be a valid mirror::Class. + template <ReadBarrierOption kReadBarrierOption = kWithoutReadBarrier> + bool IsValidClass(mirror::Class* klass) const REQUIRES_SHARED(Locks::mutator_lock_); + // Return true if the obj is likely to be a valid obj with valid mirror::Class. + template <ReadBarrierOption kReadBarrierOption = kWithoutReadBarrier> + bool IsValidObject(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_); // Does not allow null, checks alignment. bool IsValidHeapObjectAddress(const void* addr, space::Space** out_space = nullptr) const |