diff options
author | 2022-11-30 06:45:28 +0000 | |
---|---|---|
committer | 2022-12-05 18:17:40 +0000 | |
commit | 49f034785005e53ca51d2af7985e41e4be3d802d (patch) | |
tree | 6961a925bcfdf1a89fa2b22201a7bdd16bff70a7 | |
parent | 5ac8b698c560e631b0a0e38aaed1445d488da826 (diff) |
Maintain pre-zygote fork linear-alloc pages as shared-clean
Userfaultfd tends to dirty all the pages of the space it is used on.
However, we want to maintain the shared-clean trait of the pages
allocated in zygote process prior to first fork.
This CL separates the pre-zygote fork arenas from the userfaultfd
visited ones, thereby reataining the former's shared-clean trait.
Bug: 160737021
Test: module install and enable uffd GC
Change-Id: Iddffb2c8d2d234ce7b20c069d86341dda5443a9b
-rw-r--r-- | libartbase/base/arena_allocator.cc | 7 | ||||
-rw-r--r-- | libartbase/base/arena_allocator.h | 3 | ||||
-rw-r--r-- | runtime/base/gc_visited_arena_pool.cc | 60 | ||||
-rw-r--r-- | runtime/base/gc_visited_arena_pool.h | 31 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.cc | 40 | ||||
-rw-r--r-- | runtime/gc/heap.cc | 6 | ||||
-rw-r--r-- | runtime/linear_alloc-inl.h | 6 | ||||
-rw-r--r-- | runtime/linear_alloc.h | 3 | ||||
-rw-r--r-- | runtime/runtime.cc | 38 | ||||
-rw-r--r-- | runtime/runtime.h | 6 |
10 files changed, 170 insertions, 30 deletions
diff --git a/libartbase/base/arena_allocator.cc b/libartbase/base/arena_allocator.cc index e5f254288e..69c8d0b992 100644 --- a/libartbase/base/arena_allocator.cc +++ b/libartbase/base/arena_allocator.cc @@ -265,6 +265,13 @@ ArenaAllocator::~ArenaAllocator() { pool_->FreeArenaChain(arena_head_); } +void ArenaAllocator::ResetCurrentArena() { + UpdateBytesAllocated(); + begin_ = nullptr; + ptr_ = nullptr; + end_ = nullptr; +} + uint8_t* ArenaAllocator::AllocFromNewArena(size_t bytes) { Arena* new_arena = pool_->AllocArena(std::max(arena_allocator::kArenaDefaultSize, bytes)); DCHECK(new_arena != nullptr); diff --git a/libartbase/base/arena_allocator.h b/libartbase/base/arena_allocator.h index 49c14614a3..3dfeebecba 100644 --- a/libartbase/base/arena_allocator.h +++ b/libartbase/base/arena_allocator.h @@ -366,6 +366,9 @@ class ArenaAllocator DCHECK_LE(ptr_, end_); return end_ - ptr_; } + // Resets the current arena in use, which will force us to get a new arena + // on next allocation. + void ResetCurrentArena(); bool Contains(const void* ptr) const; diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc index 938dcfaebe..0fb30e226f 100644 --- a/runtime/base/gc_visited_arena_pool.cc +++ b/runtime/base/gc_visited_arena_pool.cc @@ -27,7 +27,8 @@ namespace art { -TrackedArena::TrackedArena(uint8_t* start, size_t size) : Arena(), first_obj_array_(nullptr) { +TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork) + : Arena(), first_obj_array_(nullptr), pre_zygote_fork_(pre_zygote_fork) { static_assert(ArenaAllocator::kArenaAlignment <= kPageSize, "Arena should not need stronger alignment than kPageSize."); DCHECK_ALIGNED(size, kPageSize); @@ -41,10 +42,13 @@ TrackedArena::TrackedArena(uint8_t* start, size_t size) : Arena(), first_obj_arr void TrackedArena::Release() { if (bytes_allocated_ > 0) { - // Userfaultfd GC uses memfd mappings for linear-alloc and therefore + // Userfaultfd GC uses MAP_SHARED mappings for linear-alloc and therefore // MADV_DONTNEED will not free the pages from page cache. Therefore use // MADV_REMOVE instead, which is meant for this purpose. - if (!gUseUserfaultfd || (madvise(Begin(), Size(), MADV_REMOVE) == -1 && errno == EINVAL)) { + // Arenas allocated pre-zygote fork are private anonymous and hence must be + // released using MADV_DONTNEED. + if (!gUseUserfaultfd || pre_zygote_fork_ || + (madvise(Begin(), Size(), MADV_REMOVE) == -1 && errno == EINVAL)) { // MADV_REMOVE fails if invoked on anonymous mapping, which could happen // if the arena is released before userfaultfd-GC starts using memfd. So // use MADV_DONTNEED. @@ -69,7 +73,7 @@ void TrackedArena::SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end) { } } -void GcVisitedArenaPool::AddMap(size_t min_size) { +uint8_t* GcVisitedArenaPool::AddMap(size_t min_size) { size_t size = std::max(min_size, kLinearAllocPoolSize); #if defined(__LP64__) // This is true only when we are running a 64-bit dex2oat to compile a 32-bit image. @@ -110,15 +114,11 @@ void GcVisitedArenaPool::AddMap(size_t min_size) { Chunk* chunk = new Chunk(map.Begin(), map.Size()); best_fit_allocs_.insert(chunk); free_chunks_.insert(chunk); + return map.Begin(); } -GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, const char* name) - : bytes_allocated_(0), name_(name), low_4gb_(low_4gb) { - std::lock_guard<std::mutex> lock(lock_); - // It's extremely rare to have more than one map. - maps_.reserve(1); - AddMap(/*min_size=*/0); -} +GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, bool is_zygote, const char* name) + : bytes_allocated_(0), name_(name), low_4gb_(low_4gb), pre_zygote_fork_(is_zygote) {} GcVisitedArenaPool::~GcVisitedArenaPool() { for (Chunk* chunk : free_chunks_) { @@ -133,11 +133,37 @@ size_t GcVisitedArenaPool::GetBytesAllocated() const { return bytes_allocated_; } +uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) { + DCHECK(pre_zygote_fork_); + DCHECK(Runtime::Current()->IsZygote()); + std::string pre_fork_name = "Pre-zygote-"; + pre_fork_name += name_; + std::string err_msg; + maps_.emplace_back(MemMap::MapAnonymous( + pre_fork_name.c_str(), size, PROT_READ | PROT_WRITE, low_4gb_, &err_msg)); + MemMap& map = maps_.back(); + if (!map.IsValid()) { + LOG(FATAL) << "Failed to allocate " << pre_fork_name << ": " << err_msg; + UNREACHABLE(); + } + return map.Begin(); +} + Arena* GcVisitedArenaPool::AllocArena(size_t size) { // Return only page aligned sizes so that madvise can be leveraged. size = RoundUp(size, kPageSize); - Chunk temp_chunk(nullptr, size); std::lock_guard<std::mutex> lock(lock_); + + if (pre_zygote_fork_) { + // The first fork out of zygote hasn't happened yet. Allocate arena in a + // private-anonymous mapping to retain clean pages across fork. + DCHECK(Runtime::Current()->IsZygote()); + uint8_t* addr = AddPreZygoteForkMap(size); + auto emplace_result = allocated_arenas_.emplace(addr, size, /*pre_zygote_fork=*/true); + return const_cast<TrackedArena*>(&(*emplace_result.first)); + } + + Chunk temp_chunk(nullptr, size); auto best_fit_iter = best_fit_allocs_.lower_bound(&temp_chunk); if (UNLIKELY(best_fit_iter == best_fit_allocs_.end())) { AddMap(size); @@ -151,14 +177,18 @@ Arena* GcVisitedArenaPool::AllocArena(size_t size) { // if the best-fit chunk < 2x the requested size, then give the whole chunk. if (chunk->size_ < 2 * size) { DCHECK_GE(chunk->size_, size); - auto emplace_result = allocated_arenas_.emplace(chunk->addr_, chunk->size_); + auto emplace_result = allocated_arenas_.emplace(chunk->addr_, + chunk->size_, + /*pre_zygote_fork=*/false); DCHECK(emplace_result.second); free_chunks_.erase(free_chunks_iter); best_fit_allocs_.erase(best_fit_iter); delete chunk; return const_cast<TrackedArena*>(&(*emplace_result.first)); } else { - auto emplace_result = allocated_arenas_.emplace(chunk->addr_, size); + auto emplace_result = allocated_arenas_.emplace(chunk->addr_, + size, + /*pre_zygote_fork=*/false); DCHECK(emplace_result.second); // Compute next iterators for faster insert later. auto next_best_fit_iter = best_fit_iter; @@ -263,6 +293,8 @@ void GcVisitedArenaPool::FreeArenaChain(Arena* first) { // calculate here. bytes_allocated_ += first->GetBytesAllocated(); TrackedArena* temp = down_cast<TrackedArena*>(first); + // TODO: Add logic to unmap the maps corresponding to pre-zygote-fork + // arenas, which are expected to be released only during shutdown. first = first->Next(); size_t erase_count = allocated_arenas_.erase(*temp); DCHECK_EQ(erase_count, 1u); diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h index 7a5f334f84..57b742d4c8 100644 --- a/runtime/base/gc_visited_arena_pool.h +++ b/runtime/base/gc_visited_arena_pool.h @@ -33,8 +33,8 @@ namespace art { class TrackedArena final : public Arena { public: // Used for searching in maps. Only arena's starting address is relevant. - explicit TrackedArena(uint8_t* addr) { memory_ = addr; } - TrackedArena(uint8_t* start, size_t size); + explicit TrackedArena(uint8_t* addr) : pre_zygote_fork_(false) { memory_ = addr; } + TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork); template <typename PageVisitor> void VisitRoots(PageVisitor& visitor) const REQUIRES_SHARED(Locks::mutator_lock_) { @@ -74,11 +74,13 @@ class TrackedArena final : public Arena { void SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end); void Release() override; + bool IsPreZygoteForkArena() const { return pre_zygote_fork_; } private: // first_obj_array_[i] is the object that overlaps with the ith page's // beginning, i.e. first_obj_array_[i] <= ith page_begin. std::unique_ptr<uint8_t*[]> first_obj_array_; + const bool pre_zygote_fork_; }; // An arena-pool wherein allocations can be tracked so that the GC can visit all @@ -95,7 +97,9 @@ class GcVisitedArenaPool final : public ArenaPool { static constexpr size_t kLinearAllocPoolSize = 32 * MB; #endif - explicit GcVisitedArenaPool(bool low_4gb = false, const char* name = "LinearAlloc"); + explicit GcVisitedArenaPool(bool low_4gb = false, + bool is_zygote = false, + const char* name = "LinearAlloc"); virtual ~GcVisitedArenaPool(); Arena* AllocArena(size_t size) override; void FreeArenaChain(Arena* first) override; @@ -120,10 +124,22 @@ class GcVisitedArenaPool final : public ArenaPool { } } + // Called in Heap::PreZygoteFork(). All allocations after this are done in + // arena-pool which is visited by userfaultfd. + void SetupPostZygoteMode() { + std::lock_guard<std::mutex> lock(lock_); + DCHECK(pre_zygote_fork_); + pre_zygote_fork_ = false; + } + private: void FreeRangeLocked(uint8_t* range_begin, size_t range_size) REQUIRES(lock_); - // Add a map to the pool of at least min_size - void AddMap(size_t min_size) REQUIRES(lock_); + // Add a map (to be visited by userfaultfd) to the pool of at least min_size + // and return its address. + uint8_t* AddMap(size_t min_size) REQUIRES(lock_); + // Add a private anonymous map prior to zygote fork to the pool and return its + // address. + uint8_t* AddPreZygoteForkMap(size_t size) REQUIRES(lock_); class Chunk { public: @@ -169,6 +185,11 @@ class GcVisitedArenaPool final : public ArenaPool { size_t bytes_allocated_ GUARDED_BY(lock_); const char* name_; const bool low_4gb_; + // Set to true in zygote process so that all linear-alloc allocations are in + // private-anonymous mappings and not on userfaultfd visited pages. At + // first zygote fork, it's set to false, after which all allocations are done + // in userfaultfd visited space. + bool pre_zygote_fork_ GUARDED_BY(lock_); DISALLOW_COPY_AND_ASSIGN(GcVisitedArenaPool); }; diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 865281b280..25be59f2c1 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -2059,16 +2059,21 @@ void MarkCompact::UpdateNonMovingSpaceBlackAllocations() { class MarkCompact::ImmuneSpaceUpdateObjVisitor { public: - explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {} + ImmuneSpaceUpdateObjVisitor(MarkCompact* collector, bool visit_native_roots) + : collector_(collector), visit_native_roots_(visit_native_roots) {} ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES(Locks::mutator_lock_) { RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_, obj, /*begin_*/nullptr, /*end_*/nullptr); - obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, - MemberOffset(0), - MemberOffset(-1)); + if (visit_native_roots_) { + obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ true>( + visitor, MemberOffset(0), MemberOffset(-1)); + } else { + obj->VisitRefsForCompaction</*kFetchObjSize*/ false>( + visitor, MemberOffset(0), MemberOffset(-1)); + } } static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) { @@ -2077,6 +2082,7 @@ class MarkCompact::ImmuneSpaceUpdateObjVisitor { private: MarkCompact* const collector_; + const bool visit_native_roots_; }; class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor { @@ -2298,16 +2304,30 @@ void MarkCompact::PreCompactionPhase() { } } + bool has_zygote_space = heap_->HasZygoteSpace(); GcVisitedArenaPool* arena_pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); - if (uffd_ == kFallbackMode) { + if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) { + // Besides fallback-mode, visit linear-alloc space in the pause for zygote + // processes prior to first fork (that's when zygote space gets created). + if (kIsDebugBuild && IsValidFd(uffd_)) { + // All arenas allocated so far are expected to be pre-zygote fork. + arena_pool->ForEachAllocatedArena( + [](const TrackedArena& arena) + REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); }); + } LinearAllocPageUpdater updater(this); arena_pool->VisitRoots(updater); } else { arena_pool->ForEachAllocatedArena( [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) { - uint8_t* last_byte = arena.GetLastUsedByte(); - CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second); + // The pre-zygote fork arenas are not visited concurrently in the + // zygote children processes. The native roots of the dirty objects + // are visited during immune space visit below. + if (!arena.IsPreZygoteForkArena()) { + uint8_t* last_byte = arena.GetLastUsedByte(); + CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second); + } }); } @@ -2334,7 +2354,11 @@ void MarkCompact::PreCompactionPhase() { DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); - ImmuneSpaceUpdateObjVisitor visitor(this); + // Having zygote-space indicates that the first zygote fork has taken + // place and that the classes/dex-caches in immune-spaces may have allocations + // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the + // non-userfaultfd visited private-anonymous mappings. Visit them here. + ImmuneSpaceUpdateObjVisitor visitor(this, /*visit_native_roots=*/has_zygote_space); if (table != nullptr) { table->ProcessCards(); table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor); diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index f3bb166f21..b4336235cd 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -2430,8 +2430,10 @@ void Heap::PreZygoteFork() { if (HasZygoteSpace()) { return; } - Runtime::Current()->GetInternTable()->AddNewTable(); - Runtime::Current()->GetClassLinker()->MoveClassTableToPreZygote(); + Runtime* runtime = Runtime::Current(); + runtime->GetInternTable()->AddNewTable(); + runtime->GetClassLinker()->MoveClassTableToPreZygote(); + runtime->SetupLinearAllocForPostZygoteFork(self); VLOG(heap) << "Starting PreZygoteFork"; // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote // there. diff --git a/runtime/linear_alloc-inl.h b/runtime/linear_alloc-inl.h index 928bffbc1c..13dbea11d0 100644 --- a/runtime/linear_alloc-inl.h +++ b/runtime/linear_alloc-inl.h @@ -40,6 +40,12 @@ inline void LinearAlloc::SetFirstObject(void* begin, size_t bytes) const { down_cast<TrackedArena*>(arena)->SetFirstObject(static_cast<uint8_t*>(begin), end); } +inline void LinearAlloc::SetupForPostZygoteFork(Thread* self) { + MutexLock mu(self, lock_); + DCHECK(track_allocations_); + allocator_.ResetCurrentArena(); +} + inline void* LinearAlloc::Realloc(Thread* self, void* ptr, size_t old_size, diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h index ad1e349632..c40af8ad46 100644 --- a/runtime/linear_alloc.h +++ b/runtime/linear_alloc.h @@ -90,6 +90,9 @@ class LinearAlloc { size_t GetUsedMemory() const REQUIRES(!lock_); ArenaPool* GetArenaPool() REQUIRES(!lock_); + // Force arena allocator to ask for a new arena on next allocation. This + // is to preserve private/shared clean pages across zygote fork. + void SetupForPostZygoteFork(Thread* self) REQUIRES(!lock_); // Return true if the linear alloc contains an address. bool Contains(void* ptr) const REQUIRES(!lock_); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 0560223d14..e99eaec150 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1720,7 +1720,7 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { // when we have 64 bit ArtMethod pointers. const bool low_4gb = IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA); if (gUseUserfaultfd) { - linear_alloc_arena_pool_.reset(new GcVisitedArenaPool(low_4gb)); + linear_alloc_arena_pool_.reset(new GcVisitedArenaPool(low_4gb, IsZygote())); } else if (low_4gb) { linear_alloc_arena_pool_.reset(new MemMapArenaPool(low_4gb)); } @@ -3127,6 +3127,42 @@ LinearAlloc* Runtime::CreateLinearAlloc() { : new LinearAlloc(arena_pool_.get(), /*track_allocs=*/ false); } +class Runtime::SetupLinearAllocForZygoteFork : public AllocatorVisitor { + public: + explicit SetupLinearAllocForZygoteFork(Thread* self) : self_(self) {} + + bool Visit(LinearAlloc* alloc) override { + alloc->SetupForPostZygoteFork(self_); + return true; + } + + private: + Thread* self_; +}; + +void Runtime::SetupLinearAllocForPostZygoteFork(Thread* self) { + if (gUseUserfaultfd) { + // Setup all the linear-allocs out there for post-zygote fork. This will + // basically force the arena allocator to ask for a new arena for the next + // allocation. All arenas allocated from now on will be in the userfaultfd + // visited space. + if (GetLinearAlloc() != nullptr) { + GetLinearAlloc()->SetupForPostZygoteFork(self); + } + if (GetStartupLinearAlloc() != nullptr) { + GetStartupLinearAlloc()->SetupForPostZygoteFork(self); + } + { + Locks::mutator_lock_->AssertNotHeld(self); + ReaderMutexLock mu2(self, *Locks::mutator_lock_); + ReaderMutexLock mu3(self, *Locks::classlinker_classes_lock_); + SetupLinearAllocForZygoteFork visitor(self); + GetClassLinker()->VisitAllocators(&visitor); + } + static_cast<GcVisitedArenaPool*>(GetLinearAllocArenaPool())->SetupPostZygoteMode(); + } +} + double Runtime::GetHashTableMinLoadFactor() const { return is_low_memory_mode_ ? kLowMemoryMinLoadFactor : kNormalMinLoadFactor; } diff --git a/runtime/runtime.h b/runtime/runtime.h index d6f0e81a71..9b6f545b05 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -911,6 +911,11 @@ class Runtime { // Create a normal LinearAlloc or low 4gb version if we are 64 bit AOT compiler. LinearAlloc* CreateLinearAlloc(); + // Setup linear-alloc allocators to stop using the current arena so that the + // next allocations, which would be after zygote fork, happens in userfaultfd + // visited space. + void SetupLinearAllocForPostZygoteFork(Thread* self) + REQUIRES(!Locks::mutator_lock_, !Locks::classlinker_classes_lock_); OatFileManager& GetOatFileManager() const { DCHECK(oat_file_manager_ != nullptr); @@ -1598,6 +1603,7 @@ class Runtime { friend class ScopedThreadPoolUsage; friend class OatFileAssistantTest; class NotifyStartupCompletedTask; + class SetupLinearAllocForZygoteFork; DISALLOW_COPY_AND_ASSIGN(Runtime); }; |