Maintain pre-zygote fork linear-alloc pages as shared-clean Userfaultfd tends to dirty all the pages of the space it is used on. However, we want to maintain the shared-clean trait of the pages allocated in zygote process prior to first fork. This CL separates the pre-zygote fork arenas from the userfaultfd visited ones, thereby reataining the former's shared-clean trait. Bug: 160737021 Test: module install and enable uffd GC Change-Id: Iddffb2c8d2d234ce7b20c069d86341dda5443a9b

commit: 49f034785005e53ca51d2af7985e41e4be3d802d [log] [tgz]
author: Lokesh Gidra <lokeshgidra@google.com> Wed Nov 30 06:45:28 2022 +0000
committer: Lokesh Gidra <lokeshgidra@google.com> Mon Dec 05 18:17:40 2022 +0000
tree: 6961a925bcfdf1a89fa2b22201a7bdd16bff70a7
parent: 5ac8b698c560e631b0a0e38aaed1445d488da826 [diff]
diff --git a/libartbase/base/arena_allocator.cc b/libartbase/base/arena_allocator.cc
index e5f2542..69c8d0b 100644
--- a/libartbase/base/arena_allocator.cc
+++ b/libartbase/base/arena_allocator.cc

@@ -265,6 +265,13 @@
   pool_->FreeArenaChain(arena_head_);
 }
 
+void ArenaAllocator::ResetCurrentArena() {
+  UpdateBytesAllocated();
+  begin_ = nullptr;
+  ptr_ = nullptr;
+  end_ = nullptr;
+}
+
 uint8_t* ArenaAllocator::AllocFromNewArena(size_t bytes) {
   Arena* new_arena = pool_->AllocArena(std::max(arena_allocator::kArenaDefaultSize, bytes));
   DCHECK(new_arena != nullptr);

diff --git a/libartbase/base/arena_allocator.h b/libartbase/base/arena_allocator.h
index 49c1461..3dfeebe 100644
--- a/libartbase/base/arena_allocator.h
+++ b/libartbase/base/arena_allocator.h

@@ -366,6 +366,9 @@
     DCHECK_LE(ptr_, end_);
     return end_ - ptr_;
   }
+  // Resets the current arena in use, which will force us to get a new arena
+  // on next allocation.
+  void ResetCurrentArena();
 
   bool Contains(const void* ptr) const;
 

diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc
index 938dcfa..0fb30e2 100644
--- a/runtime/base/gc_visited_arena_pool.cc
+++ b/runtime/base/gc_visited_arena_pool.cc

@@ -27,7 +27,8 @@
 
 namespace art {
 
-TrackedArena::TrackedArena(uint8_t* start, size_t size) : Arena(), first_obj_array_(nullptr) {
+TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork)
+    : Arena(), first_obj_array_(nullptr), pre_zygote_fork_(pre_zygote_fork) {
   static_assert(ArenaAllocator::kArenaAlignment <= kPageSize,
                 "Arena should not need stronger alignment than kPageSize.");
   DCHECK_ALIGNED(size, kPageSize);
@@ -41,10 +42,13 @@
 
 void TrackedArena::Release() {
   if (bytes_allocated_ > 0) {
-    // Userfaultfd GC uses memfd mappings for linear-alloc and therefore
+    // Userfaultfd GC uses MAP_SHARED mappings for linear-alloc and therefore
     // MADV_DONTNEED will not free the pages from page cache. Therefore use
     // MADV_REMOVE instead, which is meant for this purpose.
-    if (!gUseUserfaultfd || (madvise(Begin(), Size(), MADV_REMOVE) == -1 && errno == EINVAL)) {
+    // Arenas allocated pre-zygote fork are private anonymous and hence must be
+    // released using MADV_DONTNEED.
+    if (!gUseUserfaultfd || pre_zygote_fork_ ||
+        (madvise(Begin(), Size(), MADV_REMOVE) == -1 && errno == EINVAL)) {
       // MADV_REMOVE fails if invoked on anonymous mapping, which could happen
       // if the arena is released before userfaultfd-GC starts using memfd. So
       // use MADV_DONTNEED.
@@ -69,7 +73,7 @@
   }
 }
 
-void GcVisitedArenaPool::AddMap(size_t min_size) {
+uint8_t* GcVisitedArenaPool::AddMap(size_t min_size) {
   size_t size = std::max(min_size, kLinearAllocPoolSize);
 #if defined(__LP64__)
   // This is true only when we are running a 64-bit dex2oat to compile a 32-bit image.
@@ -110,15 +114,11 @@
   Chunk* chunk = new Chunk(map.Begin(), map.Size());
   best_fit_allocs_.insert(chunk);
   free_chunks_.insert(chunk);
+  return map.Begin();
 }
 
-GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, const char* name)
-  : bytes_allocated_(0), name_(name), low_4gb_(low_4gb) {
-  std::lock_guard<std::mutex> lock(lock_);
-  // It's extremely rare to have more than one map.
-  maps_.reserve(1);
-  AddMap(/*min_size=*/0);
-}
+GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, bool is_zygote, const char* name)
+    : bytes_allocated_(0), name_(name), low_4gb_(low_4gb), pre_zygote_fork_(is_zygote) {}
 
 GcVisitedArenaPool::~GcVisitedArenaPool() {
   for (Chunk* chunk : free_chunks_) {
@@ -133,11 +133,37 @@
   return bytes_allocated_;
 }
 
+uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) {
+  DCHECK(pre_zygote_fork_);
+  DCHECK(Runtime::Current()->IsZygote());
+  std::string pre_fork_name = "Pre-zygote-";
+  pre_fork_name += name_;
+  std::string err_msg;
+  maps_.emplace_back(MemMap::MapAnonymous(
+      pre_fork_name.c_str(), size, PROT_READ | PROT_WRITE, low_4gb_, &err_msg));
+  MemMap& map = maps_.back();
+  if (!map.IsValid()) {
+    LOG(FATAL) << "Failed to allocate " << pre_fork_name << ": " << err_msg;
+    UNREACHABLE();
+  }
+  return map.Begin();
+}
+
 Arena* GcVisitedArenaPool::AllocArena(size_t size) {
   // Return only page aligned sizes so that madvise can be leveraged.
   size = RoundUp(size, kPageSize);
-  Chunk temp_chunk(nullptr, size);
   std::lock_guard<std::mutex> lock(lock_);
+
+  if (pre_zygote_fork_) {
+    // The first fork out of zygote hasn't happened yet. Allocate arena in a
+    // private-anonymous mapping to retain clean pages across fork.
+    DCHECK(Runtime::Current()->IsZygote());
+    uint8_t* addr = AddPreZygoteForkMap(size);
+    auto emplace_result = allocated_arenas_.emplace(addr, size, /*pre_zygote_fork=*/true);
+    return const_cast<TrackedArena*>(&(*emplace_result.first));
+  }
+
+  Chunk temp_chunk(nullptr, size);
   auto best_fit_iter = best_fit_allocs_.lower_bound(&temp_chunk);
   if (UNLIKELY(best_fit_iter == best_fit_allocs_.end())) {
     AddMap(size);
@@ -151,14 +177,18 @@
   // if the best-fit chunk < 2x the requested size, then give the whole chunk.
   if (chunk->size_ < 2 * size) {
     DCHECK_GE(chunk->size_, size);
-    auto emplace_result = allocated_arenas_.emplace(chunk->addr_, chunk->size_);
+    auto emplace_result = allocated_arenas_.emplace(chunk->addr_,
+                                                    chunk->size_,
+                                                    /*pre_zygote_fork=*/false);
     DCHECK(emplace_result.second);
     free_chunks_.erase(free_chunks_iter);
     best_fit_allocs_.erase(best_fit_iter);
     delete chunk;
     return const_cast<TrackedArena*>(&(*emplace_result.first));
   } else {
-    auto emplace_result = allocated_arenas_.emplace(chunk->addr_, size);
+    auto emplace_result = allocated_arenas_.emplace(chunk->addr_,
+                                                    size,
+                                                    /*pre_zygote_fork=*/false);
     DCHECK(emplace_result.second);
     // Compute next iterators for faster insert later.
     auto next_best_fit_iter = best_fit_iter;
@@ -263,6 +293,8 @@
     // calculate here.
     bytes_allocated_ += first->GetBytesAllocated();
     TrackedArena* temp = down_cast<TrackedArena*>(first);
+    // TODO: Add logic to unmap the maps corresponding to pre-zygote-fork
+    // arenas, which are expected to be released only during shutdown.
     first = first->Next();
     size_t erase_count = allocated_arenas_.erase(*temp);
     DCHECK_EQ(erase_count, 1u);

diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h
index 7a5f334..57b742d 100644
--- a/runtime/base/gc_visited_arena_pool.h
+++ b/runtime/base/gc_visited_arena_pool.h

@@ -33,8 +33,8 @@
 class TrackedArena final : public Arena {
  public:
   // Used for searching in maps. Only arena's starting address is relevant.
-  explicit TrackedArena(uint8_t* addr) { memory_ = addr; }
-  TrackedArena(uint8_t* start, size_t size);
+  explicit TrackedArena(uint8_t* addr) : pre_zygote_fork_(false) { memory_ = addr; }
+  TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork);
 
   template <typename PageVisitor>
   void VisitRoots(PageVisitor& visitor) const REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -74,11 +74,13 @@
   void SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end);
 
   void Release() override;
+  bool IsPreZygoteForkArena() const { return pre_zygote_fork_; }
 
  private:
   // first_obj_array_[i] is the object that overlaps with the ith page's
   // beginning, i.e. first_obj_array_[i] <= ith page_begin.
   std::unique_ptr<uint8_t*[]> first_obj_array_;
+  const bool pre_zygote_fork_;
 };
 
 // An arena-pool wherein allocations can be tracked so that the GC can visit all
@@ -95,7 +97,9 @@
   static constexpr size_t kLinearAllocPoolSize = 32 * MB;
 #endif
 
-  explicit GcVisitedArenaPool(bool low_4gb = false, const char* name = "LinearAlloc");
+  explicit GcVisitedArenaPool(bool low_4gb = false,
+                              bool is_zygote = false,
+                              const char* name = "LinearAlloc");
   virtual ~GcVisitedArenaPool();
   Arena* AllocArena(size_t size) override;
   void FreeArenaChain(Arena* first) override;
@@ -120,10 +124,22 @@
     }
   }
 
+  // Called in Heap::PreZygoteFork(). All allocations after this are done in
+  // arena-pool which is visited by userfaultfd.
+  void SetupPostZygoteMode() {
+    std::lock_guard<std::mutex> lock(lock_);
+    DCHECK(pre_zygote_fork_);
+    pre_zygote_fork_ = false;
+  }
+
  private:
   void FreeRangeLocked(uint8_t* range_begin, size_t range_size) REQUIRES(lock_);
-  // Add a map to the pool of at least min_size
-  void AddMap(size_t min_size) REQUIRES(lock_);
+  // Add a map (to be visited by userfaultfd) to the pool of at least min_size
+  // and return its address.
+  uint8_t* AddMap(size_t min_size) REQUIRES(lock_);
+  // Add a private anonymous map prior to zygote fork to the pool and return its
+  // address.
+  uint8_t* AddPreZygoteForkMap(size_t size) REQUIRES(lock_);
 
   class Chunk {
    public:
@@ -169,6 +185,11 @@
   size_t bytes_allocated_ GUARDED_BY(lock_);
   const char* name_;
   const bool low_4gb_;
+  // Set to true in zygote process so that all linear-alloc allocations are in
+  // private-anonymous mappings and not on userfaultfd visited pages. At
+  // first zygote fork, it's set to false, after which all allocations are done
+  // in userfaultfd visited space.
+  bool pre_zygote_fork_ GUARDED_BY(lock_);
 
   DISALLOW_COPY_AND_ASSIGN(GcVisitedArenaPool);
 };

diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 865281b..25be59f 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc

@@ -2059,16 +2059,21 @@
 
 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
  public:
-  explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {}
+  ImmuneSpaceUpdateObjVisitor(MarkCompact* collector, bool visit_native_roots)
+      : collector_(collector), visit_native_roots_(visit_native_roots) {}
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES(Locks::mutator_lock_) {
     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
                                                                         obj,
                                                                         /*begin_*/nullptr,
                                                                         /*end_*/nullptr);
-    obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
-                                                        MemberOffset(0),
-                                                        MemberOffset(-1));
+    if (visit_native_roots_) {
+      obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ true>(
+          visitor, MemberOffset(0), MemberOffset(-1));
+    } else {
+      obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
+          visitor, MemberOffset(0), MemberOffset(-1));
+    }
   }
 
   static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
@@ -2077,6 +2082,7 @@
 
  private:
   MarkCompact* const collector_;
+  const bool visit_native_roots_;
 };
 
 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
@@ -2298,16 +2304,30 @@
     }
   }
 
+  bool has_zygote_space = heap_->HasZygoteSpace();
   GcVisitedArenaPool* arena_pool =
       static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
-  if (uffd_ == kFallbackMode) {
+  if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
+    // Besides fallback-mode, visit linear-alloc space in the pause for zygote
+    // processes prior to first fork (that's when zygote space gets created).
+    if (kIsDebugBuild && IsValidFd(uffd_)) {
+      // All arenas allocated so far are expected to be pre-zygote fork.
+      arena_pool->ForEachAllocatedArena(
+          [](const TrackedArena& arena)
+              REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
+    }
     LinearAllocPageUpdater updater(this);
     arena_pool->VisitRoots(updater);
   } else {
     arena_pool->ForEachAllocatedArena(
         [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) {
-          uint8_t* last_byte = arena.GetLastUsedByte();
-          CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second);
+          // The pre-zygote fork arenas are not visited concurrently in the
+          // zygote children processes. The native roots of the dirty objects
+          // are visited during immune space visit below.
+          if (!arena.IsPreZygoteForkArena()) {
+            uint8_t* last_byte = arena.GetLastUsedByte();
+            CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second);
+          }
         });
   }
 
@@ -2334,7 +2354,11 @@
       DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
       accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
-      ImmuneSpaceUpdateObjVisitor visitor(this);
+      // Having zygote-space indicates that the first zygote fork has taken
+      // place and that the classes/dex-caches in immune-spaces may have allocations
+      // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
+      // non-userfaultfd visited private-anonymous mappings. Visit them here.
+      ImmuneSpaceUpdateObjVisitor visitor(this, /*visit_native_roots=*/has_zygote_space);
       if (table != nullptr) {
         table->ProcessCards();
         table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);

diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index f3bb166..b433623 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc

@@ -2430,8 +2430,10 @@
   if (HasZygoteSpace()) {
     return;
   }
-  Runtime::Current()->GetInternTable()->AddNewTable();
-  Runtime::Current()->GetClassLinker()->MoveClassTableToPreZygote();
+  Runtime* runtime = Runtime::Current();
+  runtime->GetInternTable()->AddNewTable();
+  runtime->GetClassLinker()->MoveClassTableToPreZygote();
+  runtime->SetupLinearAllocForPostZygoteFork(self);
   VLOG(heap) << "Starting PreZygoteFork";
   // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote
   // there.

diff --git a/runtime/linear_alloc-inl.h b/runtime/linear_alloc-inl.h
index 928bffb..13dbea1 100644
--- a/runtime/linear_alloc-inl.h
+++ b/runtime/linear_alloc-inl.h

@@ -40,6 +40,12 @@
   down_cast<TrackedArena*>(arena)->SetFirstObject(static_cast<uint8_t*>(begin), end);
 }
 
+inline void LinearAlloc::SetupForPostZygoteFork(Thread* self) {
+  MutexLock mu(self, lock_);
+  DCHECK(track_allocations_);
+  allocator_.ResetCurrentArena();
+}
+
 inline void* LinearAlloc::Realloc(Thread* self,
                                   void* ptr,
                                   size_t old_size,

diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
index ad1e349..c40af8a 100644
--- a/runtime/linear_alloc.h
+++ b/runtime/linear_alloc.h

@@ -90,6 +90,9 @@
   size_t GetUsedMemory() const REQUIRES(!lock_);
 
   ArenaPool* GetArenaPool() REQUIRES(!lock_);
+  // Force arena allocator to ask for a new arena on next allocation. This
+  // is to preserve private/shared clean pages across zygote fork.
+  void SetupForPostZygoteFork(Thread* self) REQUIRES(!lock_);
 
   // Return true if the linear alloc contains an address.
   bool Contains(void* ptr) const REQUIRES(!lock_);

diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 0560223..e99eaec 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc

@@ -1720,7 +1720,7 @@
   // when we have 64 bit ArtMethod pointers.
   const bool low_4gb = IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA);
   if (gUseUserfaultfd) {
-    linear_alloc_arena_pool_.reset(new GcVisitedArenaPool(low_4gb));
+    linear_alloc_arena_pool_.reset(new GcVisitedArenaPool(low_4gb, IsZygote()));
   } else if (low_4gb) {
     linear_alloc_arena_pool_.reset(new MemMapArenaPool(low_4gb));
   }
@@ -3127,6 +3127,42 @@
       : new LinearAlloc(arena_pool_.get(), /*track_allocs=*/ false);
 }
 
+class Runtime::SetupLinearAllocForZygoteFork : public AllocatorVisitor {
+ public:
+  explicit SetupLinearAllocForZygoteFork(Thread* self) : self_(self) {}
+
+  bool Visit(LinearAlloc* alloc) override {
+    alloc->SetupForPostZygoteFork(self_);
+    return true;
+  }
+
+ private:
+  Thread* self_;
+};
+
+void Runtime::SetupLinearAllocForPostZygoteFork(Thread* self) {
+  if (gUseUserfaultfd) {
+    // Setup all the linear-allocs out there for post-zygote fork. This will
+    // basically force the arena allocator to ask for a new arena for the next
+    // allocation. All arenas allocated from now on will be in the userfaultfd
+    // visited space.
+    if (GetLinearAlloc() != nullptr) {
+      GetLinearAlloc()->SetupForPostZygoteFork(self);
+    }
+    if (GetStartupLinearAlloc() != nullptr) {
+      GetStartupLinearAlloc()->SetupForPostZygoteFork(self);
+    }
+    {
+      Locks::mutator_lock_->AssertNotHeld(self);
+      ReaderMutexLock mu2(self, *Locks::mutator_lock_);
+      ReaderMutexLock mu3(self, *Locks::classlinker_classes_lock_);
+      SetupLinearAllocForZygoteFork visitor(self);
+      GetClassLinker()->VisitAllocators(&visitor);
+    }
+    static_cast<GcVisitedArenaPool*>(GetLinearAllocArenaPool())->SetupPostZygoteMode();
+  }
+}
+
 double Runtime::GetHashTableMinLoadFactor() const {
   return is_low_memory_mode_ ? kLowMemoryMinLoadFactor : kNormalMinLoadFactor;
 }

diff --git a/runtime/runtime.h b/runtime/runtime.h
index d6f0e81..9b6f545 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h

@@ -911,6 +911,11 @@
 
   // Create a normal LinearAlloc or low 4gb version if we are 64 bit AOT compiler.
   LinearAlloc* CreateLinearAlloc();
+  // Setup linear-alloc allocators to stop using the current arena so that the
+  // next allocations, which would be after zygote fork, happens in userfaultfd
+  // visited space.
+  void SetupLinearAllocForPostZygoteFork(Thread* self)
+      REQUIRES(!Locks::mutator_lock_, !Locks::classlinker_classes_lock_);
 
   OatFileManager& GetOatFileManager() const {
     DCHECK(oat_file_manager_ != nullptr);
@@ -1598,6 +1603,7 @@
   friend class ScopedThreadPoolUsage;
   friend class OatFileAssistantTest;
   class NotifyStartupCompletedTask;
+  class SetupLinearAllocForZygoteFork;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
commit	49f034785005e53ca51d2af7985e41e4be3d802d	[log] [tgz]
author	Lokesh Gidra <lokeshgidra@google.com>	Wed Nov 30 06:45:28 2022 +0000
committer	Lokesh Gidra <lokeshgidra@google.com>	Mon Dec 05 18:17:40 2022 +0000
tree	6961a925bcfdf1a89fa2b22201a7bdd16bff70a7
parent	5ac8b698c560e631b0a0e38aaed1445d488da826 [diff]