Revert "Update class-table and intern-table concurrently with uffd GC"

This reverts commit 97a6f7cd191cde0abaaf6323ae2f67d8e42a1236. Reason for revert: LUCI failure in libcore test Change-Id: I381b261ae0f67103bf1d096d8f64c84ba3f3e19c
author: Lokesh Gidra <lokeshgidra@google.com> 2023-10-17 19:15:23 +0000
committer: Lokesh Gidra <lokeshgidra@google.com> 2023-10-17 21:32:14 +0000
commit: 9faffd5c4e062ca45bd6f29a3b6d1b276e6c9839 (patch)
tree: 2c49f685bbe21597b707587859f2bae1f1f92495
parent: 66b2595485bdc2a9fbadd7a1058f9c9fd0a0cc37 (diff)
11 files changed, 241 insertions, 613 deletions
diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc
index 77cc7ae32d..039157a30c 100644
--- a/runtime/base/gc_visited_arena_pool.cc
+++ b/runtime/base/gc_visited_arena_pool.cc
@@ -27,67 +27,43 @@
 
 namespace art {
 
-TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork, bool single_obj_arena)
-    : Arena(),
-      first_obj_array_(nullptr),
-      pre_zygote_fork_(pre_zygote_fork),
-      waiting_for_deletion_(false) {
+TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork)
+    : Arena(), first_obj_array_(nullptr), pre_zygote_fork_(pre_zygote_fork) {
   static_assert(ArenaAllocator::kArenaAlignment <= kPageSize,
                 "Arena should not need stronger alignment than kPageSize.");
+  DCHECK_ALIGNED(size, kPageSize);
+  DCHECK_ALIGNED(start, kPageSize);
   memory_ = start;
   size_ = size;
-  if (single_obj_arena) {
-    // We have only one object in this arena and it is expected to consume the
-    // entire arena.
-    bytes_allocated_ = size;
-  } else {
-    DCHECK_ALIGNED(size, kPageSize);
-    DCHECK_ALIGNED(start, kPageSize);
-    size_t arr_size = size / kPageSize;
-    first_obj_array_.reset(new uint8_t*[arr_size]);
-    std::fill_n(first_obj_array_.get(), arr_size, nullptr);
-  }
-}
-
-void TrackedArena::ReleasePages(uint8_t* begin, size_t size, bool pre_zygote_fork) {
-  DCHECK_ALIGNED(begin, kPageSize);
-  // Userfaultfd GC uses MAP_SHARED mappings for linear-alloc and therefore
-  // MADV_DONTNEED will not free the pages from page cache. Therefore use
-  // MADV_REMOVE instead, which is meant for this purpose.
-  // Arenas allocated pre-zygote fork are private anonymous and hence must be
-  // released using MADV_DONTNEED.
-  if (!gUseUserfaultfd || pre_zygote_fork ||
-      (madvise(begin, size, MADV_REMOVE) == -1 && errno == EINVAL)) {
-    // MADV_REMOVE fails if invoked on anonymous mapping, which could happen
-    // if the arena is released before userfaultfd-GC starts using memfd. So
-    // use MADV_DONTNEED.
-    ZeroAndReleaseMemory(begin, size);
-  }
+  size_t arr_size = size / kPageSize;
+  first_obj_array_.reset(new uint8_t*[arr_size]);
+  std::fill_n(first_obj_array_.get(), arr_size, nullptr);
 }
 
 void TrackedArena::Release() {
   if (bytes_allocated_ > 0) {
-    ReleasePages(Begin(), Size(), pre_zygote_fork_);
-    if (first_obj_array_.get() != nullptr) {
-      std::fill_n(first_obj_array_.get(), Size() / kPageSize, nullptr);
+    // Userfaultfd GC uses MAP_SHARED mappings for linear-alloc and therefore
+    // MADV_DONTNEED will not free the pages from page cache. Therefore use
+    // MADV_REMOVE instead, which is meant for this purpose.
+    // Arenas allocated pre-zygote fork are private anonymous and hence must be
+    // released using MADV_DONTNEED.
+    if (!gUseUserfaultfd || pre_zygote_fork_ ||
+        (madvise(Begin(), Size(), MADV_REMOVE) == -1 && errno == EINVAL)) {
+      // MADV_REMOVE fails if invoked on anonymous mapping, which could happen
+      // if the arena is released before userfaultfd-GC starts using memfd. So
+      // use MADV_DONTNEED.
+      ZeroAndReleaseMemory(Begin(), Size());
     }
+    std::fill_n(first_obj_array_.get(), Size() / kPageSize, nullptr);
     bytes_allocated_ = 0;
   }
 }
 
 void TrackedArena::SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end) {
-  DCHECK(first_obj_array_.get() != nullptr);
   DCHECK_LE(static_cast<void*>(Begin()), static_cast<void*>(obj_end));
   DCHECK_LT(static_cast<void*>(obj_begin), static_cast<void*>(obj_end));
-  GcVisitedArenaPool* arena_pool =
-      static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
   size_t idx = static_cast<size_t>(obj_begin - Begin()) / kPageSize;
   size_t last_byte_idx = static_cast<size_t>(obj_end - 1 - Begin()) / kPageSize;
-  // Do the update below with arena-pool's lock in shared-mode to serialize with
-  // the compaction-pause wherein we acquire it exclusively. This is to ensure
-  // that last-byte read there doesn't change after reading it and before
-  // userfaultfd registration.
-  ReaderMutexLock rmu(Thread::Current(), arena_pool->GetLock());
   // If the addr is at the beginning of a page, then we set it for that page too.
   if (IsAligned<kPageSize>(obj_begin)) {
     first_obj_array_[idx] = obj_begin;
@@ -130,13 +106,7 @@ uint8_t* GcVisitedArenaPool::AddMap(size_t min_size) {
 }
 
 GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, bool is_zygote, const char* name)
-    : lock_("gc-visited arena-pool", kGenericBottomLock),
-      bytes_allocated_(0),
-      unused_arenas_(nullptr),
-      name_(name),
-      defer_arena_freeing_(false),
-      low_4gb_(low_4gb),
-      pre_zygote_fork_(is_zygote) {}
+    : bytes_allocated_(0), name_(name), low_4gb_(low_4gb), pre_zygote_fork_(is_zygote) {}
 
 GcVisitedArenaPool::~GcVisitedArenaPool() {
   for (Chunk* chunk : free_chunks_) {
@@ -147,12 +117,13 @@ GcVisitedArenaPool::~GcVisitedArenaPool() {
 }
 
 size_t GcVisitedArenaPool::GetBytesAllocated() const {
-  ReaderMutexLock rmu(Thread::Current(), lock_);
+  std::lock_guard<std::mutex> lock(lock_);
   return bytes_allocated_;
 }
 
 uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) {
   DCHECK(pre_zygote_fork_);
+  DCHECK(Runtime::Current()->IsZygote());
   std::string pre_fork_name = "Pre-zygote-";
   pre_fork_name += name_;
   std::string err_msg;
@@ -166,67 +137,18 @@ uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) {
   return map.Begin();
 }
 
-uint8_t* GcVisitedArenaPool::AllocSingleObjArena(size_t size) {
-  WriterMutexLock wmu(Thread::Current(), lock_);
-  Arena* arena;
-  DCHECK(gUseUserfaultfd);
-  // To minimize private dirty, all class and intern table allocations are
-  // done outside LinearAlloc range so they are untouched during GC.
-  if (pre_zygote_fork_) {
-    uint8_t* begin = static_cast<uint8_t*>(malloc(size));
-    auto insert_result = allocated_arenas_.insert(
-        new TrackedArena(begin, size, /*pre_zygote_fork=*/true, /*single_obj_arena=*/true));
-    arena = *insert_result.first;
-  } else {
-    arena = AllocArena(size, /*single_obj_arena=*/true);
-  }
-  return arena->Begin();
-}
-
-void GcVisitedArenaPool::FreeSingleObjArena(uint8_t* addr) {
-  Thread* self = Thread::Current();
-  size_t size;
-  bool zygote_arena;
-  {
-    TrackedArena temp_arena(addr);
-    WriterMutexLock wmu(self, lock_);
-    auto iter = allocated_arenas_.find(&temp_arena);
-    DCHECK(iter != allocated_arenas_.end());
-    TrackedArena* arena = *iter;
-    size = arena->Size();
-    zygote_arena = arena->IsPreZygoteForkArena();
-    DCHECK_EQ(arena->Begin(), addr);
-    DCHECK(arena->IsSingleObjectArena());
-    allocated_arenas_.erase(iter);
-    if (defer_arena_freeing_) {
-      arena->SetupForDeferredDeletion(unused_arenas_);
-      unused_arenas_ = arena;
-    } else {
-      delete arena;
-    }
-  }
-  // Refer to the comment in FreeArenaChain() for why the pages are released
-  // after deleting the arena.
-  if (zygote_arena) {
-    free(addr);
-  } else {
-    TrackedArena::ReleasePages(addr, size, /*pre_zygote_fork=*/false);
-    WriterMutexLock wmu(self, lock_);
-    FreeRangeLocked(addr, size);
-  }
-}
-
-Arena* GcVisitedArenaPool::AllocArena(size_t size, bool single_obj_arena) {
+Arena* GcVisitedArenaPool::AllocArena(size_t size) {
   // Return only page aligned sizes so that madvise can be leveraged.
   size = RoundUp(size, kPageSize);
+  std::lock_guard<std::mutex> lock(lock_);
+
   if (pre_zygote_fork_) {
     // The first fork out of zygote hasn't happened yet. Allocate arena in a
     // private-anonymous mapping to retain clean pages across fork.
+    DCHECK(Runtime::Current()->IsZygote());
     uint8_t* addr = AddPreZygoteForkMap(size);
-    auto insert_result = allocated_arenas_.insert(
-        new TrackedArena(addr, size, /*pre_zygote_fork=*/true, single_obj_arena));
-    DCHECK(insert_result.second);
-    return *insert_result.first;
+    auto emplace_result = allocated_arenas_.emplace(addr, size, /*pre_zygote_fork=*/true);
+    return const_cast<TrackedArena*>(&(*emplace_result.first));
   }
 
   Chunk temp_chunk(nullptr, size);
@@ -243,21 +165,19 @@ Arena* GcVisitedArenaPool::AllocArena(size_t size, bool single_obj_arena) {
   // if the best-fit chunk < 2x the requested size, then give the whole chunk.
   if (chunk->size_ < 2 * size) {
     DCHECK_GE(chunk->size_, size);
-    auto insert_result = allocated_arenas_.insert(new TrackedArena(chunk->addr_,
-                                                                   chunk->size_,
-                                                                   /*pre_zygote_fork=*/false,
-                                                                   single_obj_arena));
-    DCHECK(insert_result.second);
+    auto emplace_result = allocated_arenas_.emplace(chunk->addr_,
+                                                    chunk->size_,
+                                                    /*pre_zygote_fork=*/false);
+    DCHECK(emplace_result.second);
     free_chunks_.erase(free_chunks_iter);
     best_fit_allocs_.erase(best_fit_iter);
     delete chunk;
-    return *insert_result.first;
+    return const_cast<TrackedArena*>(&(*emplace_result.first));
   } else {
-    auto insert_result = allocated_arenas_.insert(new TrackedArena(chunk->addr_,
-                                                                   size,
-                                                                   /*pre_zygote_fork=*/false,
-                                                                   single_obj_arena));
-    DCHECK(insert_result.second);
+    auto emplace_result = allocated_arenas_.emplace(chunk->addr_,
+                                                    size,
+                                                    /*pre_zygote_fork=*/false);
+    DCHECK(emplace_result.second);
     // Compute next iterators for faster insert later.
     auto next_best_fit_iter = best_fit_iter;
     next_best_fit_iter++;
@@ -270,7 +190,7 @@ Arena* GcVisitedArenaPool::AllocArena(size_t size, bool single_obj_arena) {
     DCHECK_EQ(free_chunks_nh.value()->addr_, chunk->addr_);
     best_fit_allocs_.insert(next_best_fit_iter, std::move(best_fit_nh));
     free_chunks_.insert(next_free_chunks_iter, std::move(free_chunks_nh));
-    return *insert_result.first;
+    return const_cast<TrackedArena*>(&(*emplace_result.first));
   }
 }
 
@@ -346,79 +266,27 @@ void GcVisitedArenaPool::FreeArenaChain(Arena* first) {
   // TODO: Handle the case when arena_allocator::kArenaAllocatorPreciseTracking
   // is true. See MemMapArenaPool::FreeArenaChain() for example.
   CHECK(!arena_allocator::kArenaAllocatorPreciseTracking);
-  Thread* self = Thread::Current();
-  // vector of arena ranges to be freed and whether they are pre-zygote-fork.
-  std::vector<std::tuple<uint8_t*, size_t, bool>> free_ranges;
-
-  {
-    WriterMutexLock wmu(self, lock_);
-    while (first != nullptr) {
-      TrackedArena* temp = down_cast<TrackedArena*>(first);
-      DCHECK(!temp->IsSingleObjectArena());
-      first = first->Next();
-      free_ranges.emplace_back(temp->Begin(), temp->Size(), temp->IsPreZygoteForkArena());
-      // In other implementations of ArenaPool this is calculated when asked for,
-      // thanks to the list of free arenas that is kept around. But in this case,
-      // we release the freed arena back to the pool and therefore need to
-      // calculate here.
-      bytes_allocated_ += temp->GetBytesAllocated();
-      auto iter = allocated_arenas_.find(temp);
-      DCHECK(iter != allocated_arenas_.end());
-      allocated_arenas_.erase(iter);
-      if (defer_arena_freeing_) {
-        temp->SetupForDeferredDeletion(unused_arenas_);
-        unused_arenas_ = temp;
-      } else {
-        delete temp;
-      }
-    }
-  }
 
-  // madvise of arenas must be done after the above loop which serializes with
-  // MarkCompact::ProcessLinearAlloc() so that if it finds an arena to be not
-  // 'waiting-for-deletion' then it finishes the arena's processing before
-  // clearing here. Otherwise, we could have a situation wherein arena-pool
-  // assumes the memory range of the arena(s) to be zero'ed (by madvise),
-  // whereas GC maps stale arena pages.
-  for (auto& iter : free_ranges) {
-    // No need to madvise pre-zygote-fork arenas as they will munmapped below.
-    if (!std::get<2>(iter)) {
-      TrackedArena::ReleasePages(std::get<0>(iter), std::get<1>(iter), /*pre_zygote_fork=*/false);
-    }
+  // madvise the arenas before acquiring lock for scalability
+  for (Arena* temp = first; temp != nullptr; temp = temp->Next()) {
+    temp->Release();
   }
 
-  WriterMutexLock wmu(self, lock_);
-  for (auto& iter : free_ranges) {
-    if (UNLIKELY(std::get<2>(iter))) {
-      bool found = false;
-      for (auto map_iter = maps_.begin(); map_iter != maps_.end(); map_iter++) {
-        if (map_iter->Begin() == std::get<0>(iter)) {
-          // erase will destruct the MemMap and thereby munmap. But this happens
-          // very rarely so it's ok to do it with lock acquired.
-          maps_.erase(map_iter);
-          found = true;
-          break;
-        }
-      }
-      CHECK(found);
-    } else {
-      FreeRangeLocked(std::get<0>(iter), std::get<1>(iter));
-    }
-  }
-}
-
-void GcVisitedArenaPool::DeleteUnusedArenas() {
-  TrackedArena* arena;
-  {
-    WriterMutexLock wmu(Thread::Current(), lock_);
-    defer_arena_freeing_ = false;
-    arena = unused_arenas_;
-    unused_arenas_ = nullptr;
-  }
-  while (arena != nullptr) {
-    TrackedArena* temp = down_cast<TrackedArena*>(arena->Next());
-    delete arena;
-    arena = temp;
+  std::lock_guard<std::mutex> lock(lock_);
+  arenas_freed_ = true;
+  while (first != nullptr) {
+    FreeRangeLocked(first->Begin(), first->Size());
+    // In other implementations of ArenaPool this is calculated when asked for,
+    // thanks to the list of free arenas that is kept around. But in this case,
+    // we release the freed arena back to the pool and therefore need to
+    // calculate here.
+    bytes_allocated_ += first->GetBytesAllocated();
+    TrackedArena* temp = down_cast<TrackedArena*>(first);
+    // TODO: Add logic to unmap the maps corresponding to pre-zygote-fork
+    // arenas, which are expected to be released only during shutdown.
+    first = first->Next();
+    size_t erase_count = allocated_arenas_.erase(*temp);
+    DCHECK_EQ(erase_count, 1u);
   }
 }
 
diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h
index f3c1abcb5a..e307147c9e 100644
--- a/runtime/base/gc_visited_arena_pool.h
+++ b/runtime/base/gc_visited_arena_pool.h
@@ -17,16 +17,12 @@
 #ifndef ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_
 #define ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_
 
-#include <set>
-
-#include "base/allocator.h"
-#include "base/arena_allocator.h"
 #include "base/casts.h"
-#include "base/hash_set.h"
+#include "base/arena_allocator.h"
 #include "base/locks.h"
 #include "base/mem_map.h"
-#include "read_barrier_config.h"
-#include "runtime.h"
+
+#include <set>
 
 namespace art {
 
@@ -38,45 +34,27 @@ class TrackedArena final : public Arena {
  public:
   // Used for searching in maps. Only arena's starting address is relevant.
   explicit TrackedArena(uint8_t* addr) : pre_zygote_fork_(false) { memory_ = addr; }
-  TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork, bool single_obj_arena);
+  TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork);
 
   template <typename PageVisitor>
   void VisitRoots(PageVisitor& visitor) const REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK_ALIGNED(Size(), kPageSize);
+    DCHECK_ALIGNED(Begin(), kPageSize);
+    int nr_pages = Size() / kPageSize;
     uint8_t* page_begin = Begin();
-    if (first_obj_array_.get() != nullptr) {
-      DCHECK_ALIGNED(Size(), kPageSize);
-      DCHECK_ALIGNED(Begin(), kPageSize);
-      for (int i = 0, nr_pages = Size() / kPageSize; i < nr_pages; i++, page_begin += kPageSize) {
-        uint8_t* first = first_obj_array_[i];
-        if (first != nullptr) {
-          visitor(page_begin, first, kPageSize);
-        } else {
-          break;
-        }
-      }
-    } else {
-      size_t page_size = Size();
-      while (page_size > kPageSize) {
-        visitor(page_begin, nullptr, kPageSize);
-        page_begin += kPageSize;
-        page_size -= kPageSize;
-      }
-      visitor(page_begin, nullptr, page_size);
+    for (int i = 0; i < nr_pages && first_obj_array_[i] != nullptr; i++, page_begin += kPageSize) {
+      visitor(page_begin, first_obj_array_[i]);
     }
   }
 
   // Return the page addr of the first page with first_obj set to nullptr.
   uint8_t* GetLastUsedByte() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK_ALIGNED(Begin(), kPageSize);
+    DCHECK_ALIGNED(End(), kPageSize);
     // Jump past bytes-allocated for arenas which are not currently being used
     // by arena-allocator. This helps in reducing loop iterations below.
     uint8_t* last_byte = AlignUp(Begin() + GetBytesAllocated(), kPageSize);
-    if (first_obj_array_.get() != nullptr) {
-      DCHECK_ALIGNED(Begin(), kPageSize);
-      DCHECK_ALIGNED(End(), kPageSize);
-      DCHECK_LE(last_byte, End());
-    } else {
-      DCHECK_EQ(last_byte, End());
-    }
+    DCHECK_LE(last_byte, End());
     for (size_t i = (last_byte - Begin()) / kPageSize;
          last_byte < End() && first_obj_array_[i] != nullptr;
          last_byte += kPageSize, i++) {
@@ -88,43 +66,21 @@ class TrackedArena final : public Arena {
   uint8_t* GetFirstObject(uint8_t* addr) const REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK_LE(Begin(), addr);
     DCHECK_GT(End(), addr);
-    if (first_obj_array_.get() != nullptr) {
-      return first_obj_array_[(addr - Begin()) / kPageSize];
-    } else {
-      // The pages of this arena contain array of GC-roots. So we don't need
-      // first-object of any given page of the arena.
-      // Returning null helps distinguish which visitor is to be called.
-      return nullptr;
-    }
+    return first_obj_array_[(addr - Begin()) / kPageSize];
   }
 
   // Set 'obj_begin' in first_obj_array_ in every element for which it's the
   // first object.
   void SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end);
-  // Setup the arena for deferred deletion.
-  void SetupForDeferredDeletion(TrackedArena* next_arena) {
-    DCHECK(next_arena->waiting_for_deletion_);
-    DCHECK(!waiting_for_deletion_);
-    waiting_for_deletion_ = true;
-    next_ = next_arena;
-  }
-  bool IsWaitingForDeletion() const { return waiting_for_deletion_; }
 
-  // Madvise the pages in the given range. 'begin' is expected to be page
-  // aligned.
-  // TODO: Remove this once we remove the shmem (minor-fault) code in
-  // userfaultfd GC and directly use ZeroAndReleaseMemory().
-  static void ReleasePages(uint8_t* begin, size_t size, bool pre_zygote_fork);
   void Release() override;
   bool IsPreZygoteForkArena() const { return pre_zygote_fork_; }
-  bool IsSingleObjectArena() const { return first_obj_array_.get() == nullptr; }
 
  private:
   // first_obj_array_[i] is the object that overlaps with the ith page's
   // beginning, i.e. first_obj_array_[i] <= ith page_begin.
   std::unique_ptr<uint8_t*[]> first_obj_array_;
   const bool pre_zygote_fork_;
-  bool waiting_for_deletion_;
 };
 
 // An arena-pool wherein allocations can be tracked so that the GC can visit all
@@ -145,24 +101,15 @@ class GcVisitedArenaPool final : public ArenaPool {
                               bool is_zygote = false,
                               const char* name = "LinearAlloc");
   virtual ~GcVisitedArenaPool();
-
-  Arena* AllocArena(size_t size, bool need_first_obj_arr) REQUIRES(lock_);
-  // Use by arena allocator.
-  Arena* AllocArena(size_t size) override REQUIRES(!lock_) {
-    WriterMutexLock wmu(Thread::Current(), lock_);
-    return AllocArena(size, /*single_obj_arena=*/false);
-  }
-  void FreeArenaChain(Arena* first) override REQUIRES(!lock_);
-  size_t GetBytesAllocated() const override REQUIRES(!lock_);
+  Arena* AllocArena(size_t size) override;
+  void FreeArenaChain(Arena* first) override;
+  size_t GetBytesAllocated() const override;
   void ReclaimMemory() override {}
   void LockReclaimMemory() override {}
   void TrimMaps() override {}
 
-  uint8_t* AllocSingleObjArena(size_t size) REQUIRES(!lock_);
-  void FreeSingleObjArena(uint8_t* addr) REQUIRES(!lock_);
-
-  bool Contains(void* ptr) REQUIRES(!lock_) {
-    ReaderMutexLock rmu(Thread::Current(), lock_);
+  bool Contains(void* ptr) {
+    std::lock_guard<std::mutex> lock(lock_);
     for (auto& map : maps_) {
       if (map.HasAddress(ptr)) {
         return true;
@@ -172,43 +119,51 @@ class GcVisitedArenaPool final : public ArenaPool {
   }
 
   template <typename PageVisitor>
-  void VisitRoots(PageVisitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_, lock_) {
+  void VisitRoots(PageVisitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_) {
+    std::lock_guard<std::mutex> lock(lock_);
     for (auto& arena : allocated_arenas_) {
-      arena->VisitRoots(visitor);
+      arena.VisitRoots(visitor);
     }
   }
 
   template <typename Callback>
-  void ForEachAllocatedArena(Callback cb) REQUIRES_SHARED(Locks::mutator_lock_, lock_) {
-    // We should not have any unused arenas when calling this function.
-    CHECK(unused_arenas_ == nullptr);
+  void ForEachAllocatedArena(Callback cb) REQUIRES_SHARED(Locks::mutator_lock_) {
+    std::lock_guard<std::mutex> lock(lock_);
     for (auto& arena : allocated_arenas_) {
-      cb(*arena);
+      cb(arena);
     }
   }
 
   // Called in Heap::PreZygoteFork(). All allocations after this are done in
   // arena-pool which is visited by userfaultfd.
-  void SetupPostZygoteMode() REQUIRES(!lock_) {
-    WriterMutexLock wmu(Thread::Current(), lock_);
+  void SetupPostZygoteMode() {
+    std::lock_guard<std::mutex> lock(lock_);
     DCHECK(pre_zygote_fork_);
     pre_zygote_fork_ = false;
   }
 
   // For userfaultfd GC to be able to acquire the lock to avoid concurrent
   // release of arenas when it is visiting them.
-  ReaderWriterMutex& GetLock() const RETURN_CAPABILITY(lock_) { return lock_; }
+  std::mutex& GetLock() { return lock_; }
+
+  // Find the given arena in allocated_arenas_. The function is called with
+  // lock_ acquired.
+  bool FindAllocatedArena(const TrackedArena* arena) const NO_THREAD_SAFETY_ANALYSIS {
+    for (auto& allocated_arena : allocated_arenas_) {
+      if (arena == &allocated_arena) {
+        return true;
+      }
+    }
+    return false;
+  }
 
-  // Called in the compaction pause to indicate that all arenas that will be
-  // freed until compaction is done shouldn't delete the TrackedArena object to
-  // avoid ABA problem. Called with lock_ acquired.
-  void DeferArenaFreeing() REQUIRES(lock_) {
-    CHECK(unused_arenas_ == nullptr);
-    defer_arena_freeing_ = true;
+  void ClearArenasFreed() {
+    std::lock_guard<std::mutex> lock(lock_);
+    arenas_freed_ = false;
   }
 
-  // Clear defer_arena_freeing_ and delete all unused arenas.
-  void DeleteUnusedArenas() REQUIRES(!lock_);
+  // The function is called with lock_ acquired.
+  bool AreArenasFreed() const NO_THREAD_SAFETY_ANALYSIS { return arenas_freed_; }
 
  private:
   void FreeRangeLocked(uint8_t* range_begin, size_t range_size) REQUIRES(lock_);
@@ -242,40 +197,31 @@ class GcVisitedArenaPool final : public ArenaPool {
     }
   };
 
-  class TrackedArenaEquals {
+  class LessByArenaAddr {
    public:
-    bool operator()(const TrackedArena* a, const TrackedArena* b) const {
-      return std::equal_to<uint8_t*>{}(a->Begin(), b->Begin());
+    bool operator()(const TrackedArena& a, const TrackedArena& b) const {
+      return std::less<uint8_t*>{}(a.Begin(), b.Begin());
     }
   };
 
-  class TrackedArenaHash {
-   public:
-    size_t operator()(const TrackedArena* arena) const {
-      return std::hash<size_t>{}(reinterpret_cast<uintptr_t>(arena->Begin()) / kPageSize);
-    }
-  };
-  using AllocatedArenaSet =
-      HashSet<TrackedArena*, DefaultEmptyFn<TrackedArena*>, TrackedArenaHash, TrackedArenaEquals>;
-
-  mutable ReaderWriterMutex lock_;
+  // Use a std::mutex here as Arenas are second-from-the-bottom when using MemMaps, and MemMap
+  // itself uses std::mutex scoped to within an allocate/free only.
+  mutable std::mutex lock_;
   std::vector<MemMap> maps_ GUARDED_BY(lock_);
   std::set<Chunk*, LessByChunkSize> best_fit_allocs_ GUARDED_BY(lock_);
   std::set<Chunk*, LessByChunkAddr> free_chunks_ GUARDED_BY(lock_);
   // Set of allocated arenas. It's required to be able to find the arena
   // corresponding to a given address.
-  AllocatedArenaSet allocated_arenas_ GUARDED_BY(lock_);
+  // TODO: consider using HashSet, which is more memory efficient.
+  std::set<TrackedArena, LessByArenaAddr> allocated_arenas_ GUARDED_BY(lock_);
   // Number of bytes allocated so far.
   size_t bytes_allocated_ GUARDED_BY(lock_);
-  // To hold arenas that are freed while GC is happening. These are kept until
-  // the end of GC to avoid ABA problem.
-  TrackedArena* unused_arenas_ GUARDED_BY(lock_);
   const char* name_;
   // Flag to indicate that some arenas have been freed. This flag is used as an
   // optimization by GC to know if it needs to find if the arena being visited
   // has been freed or not. The flag is cleared in the compaction pause and read
   // when linear-alloc space is concurrently visited updated to update GC roots.
-  bool defer_arena_freeing_ GUARDED_BY(lock_);
+  bool arenas_freed_ GUARDED_BY(lock_);
   const bool low_4gb_;
   // Set to true in zygote process so that all linear-alloc allocations are in
   // private-anonymous mappings and not on userfaultfd visited pages. At
@@ -286,55 +232,6 @@ class GcVisitedArenaPool final : public ArenaPool {
   DISALLOW_COPY_AND_ASSIGN(GcVisitedArenaPool);
 };
 
-// Allocator for class-table and intern-table hash-sets. It enables updating the
-// roots concurrently page-by-page.
-template <class T, AllocatorTag kTag>
-class GcRootArenaAllocator : public TrackingAllocator<T, kTag> {
- public:
-  using value_type = typename TrackingAllocator<T, kTag>::value_type;
-  using size_type = typename TrackingAllocator<T, kTag>::size_type;
-  using difference_type = typename TrackingAllocator<T, kTag>::difference_type;
-  using pointer = typename TrackingAllocator<T, kTag>::pointer;
-  using const_pointer = typename TrackingAllocator<T, kTag>::const_pointer;
-  using reference = typename TrackingAllocator<T, kTag>::reference;
-  using const_reference = typename TrackingAllocator<T, kTag>::const_reference;
-
-  // Used internally by STL data structures.
-  template <class U>
-  explicit GcRootArenaAllocator(
-      [[maybe_unused]] const GcRootArenaAllocator<U, kTag>& alloc) noexcept {}
-  // Used internally by STL data structures.
-  GcRootArenaAllocator() noexcept : TrackingAllocator<T, kTag>() {}
-
-  // Enables an allocator for objects of one type to allocate storage for objects of another type.
-  // Used internally by STL data structures.
-  template <class U>
-  struct rebind {
-    using other = GcRootArenaAllocator<U, kTag>;
-  };
-
-  pointer allocate(size_type n, [[maybe_unused]] const_pointer hint = 0) {
-    if (!gUseUserfaultfd) {
-      return TrackingAllocator<T, kTag>::allocate(n);
-    }
-    size_t size = n * sizeof(T);
-    GcVisitedArenaPool* pool =
-        down_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
-    return reinterpret_cast<pointer>(pool->AllocSingleObjArena(size));
-  }
-
-  template <typename PT>
-  void deallocate(PT p, size_type n) {
-    if (!gUseUserfaultfd) {
-      TrackingAllocator<T, kTag>::deallocate(p, n);
-      return;
-    }
-    GcVisitedArenaPool* pool =
-        down_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
-    pool->FreeSingleObjArena(reinterpret_cast<uint8_t*>(p));
-  }
-};
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d4666e5f9c..9d175cdba5 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -2426,9 +2426,17 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
     boot_class_table_->VisitRoots(root_visitor);
     // If tracing is enabled, then mark all the class loaders to prevent unloading.
     if ((flags & kVisitRootFlagClassLoader) != 0 || tracing_enabled) {
-      for (const ClassLoaderData& data : class_loaders_) {
-        GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root)));
-        root.VisitRoot(visitor, RootInfo(kRootVMInternal));
+      gc::Heap* const heap = Runtime::Current()->GetHeap();
+      // Don't visit class-loaders if compacting with userfaultfd GC as these
+      // weaks are updated using Runtime::SweepSystemWeaks() and the GC doesn't
+      // tolerate double updates.
+      if (!heap->IsPerformingUffdCompaction()) {
+        for (const ClassLoaderData& data : class_loaders_) {
+          GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root)));
+          root.VisitRoot(visitor, RootInfo(kRootVMInternal));
+        }
+      } else {
+        DCHECK_EQ(heap->CurrentCollectorType(), gc::CollectorType::kCollectorTypeCMC);
       }
     }
   } else if (!gUseReadBarrier && (flags & kVisitRootFlagNewRoots) != 0) {
@@ -2468,11 +2476,9 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
-void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags, bool visit_class_roots) {
+void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
   class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
-  if (visit_class_roots) {
-    VisitClassRoots(visitor, flags);
-  }
+  VisitClassRoots(visitor, flags);
   // Instead of visiting the find_array_class_cache_ drop it so that it doesn't prevent class
   // unloading if we are marking roots.
   DropFindArrayClassCache();
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index c81713b116..3aae0422c1 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -514,7 +514,7 @@ class ClassLinker {
   void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags)
       REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags, bool visit_class_roots = true)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       REQUIRES(!Locks::dex_lock_, !Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   // Visits all dex-files accessible by any class-loader or the BCP.
@@ -866,9 +866,6 @@ class ClassLinker {
   // Enable or disable public sdk checks.
   virtual void SetEnablePublicSdkChecks(bool enabled);
   void RemoveDexFromCaches(const DexFile& dex_file);
-  ClassTable* GetBootClassTable() REQUIRES_SHARED(Locks::classlinker_classes_lock_) {
-    return boot_class_table_.get();
-  }
 
  protected:
   virtual bool InitializeClass(Thread* self,
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index 4ee59a79f7..ecc8a0a620 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -68,14 +68,12 @@ inline bool ClassTable::ClassDescriptorEquals::operator()(const TableSlot& a,
   return a.Read<kWithoutReadBarrier>()->DescriptorEquals(b.first);
 }
 
-template <class Visitor>
-void ClassTable::VisitRoots(Visitor& visitor, bool skip_classes) {
+template<class Visitor>
+void ClassTable::VisitRoots(Visitor& visitor) {
   ReaderMutexLock mu(Thread::Current(), lock_);
-  if (!skip_classes) {
-    for (ClassSet& class_set : classes_) {
-      for (TableSlot& table_slot : class_set) {
-        table_slot.VisitRoot(visitor);
-      }
+  for (ClassSet& class_set : classes_) {
+    for (TableSlot& table_slot : class_set) {
+      table_slot.VisitRoot(visitor);
     }
   }
   for (GcRoot<mirror::Object>& root : strong_roots_) {
@@ -88,14 +86,12 @@ void ClassTable::VisitRoots(Visitor& visitor, bool skip_classes) {
   }
 }
 
-template <class Visitor>
-void ClassTable::VisitRoots(const Visitor& visitor, bool skip_classes) {
+template<class Visitor>
+void ClassTable::VisitRoots(const Visitor& visitor) {
   ReaderMutexLock mu(Thread::Current(), lock_);
-  if (!skip_classes) {
-    for (ClassSet& class_set : classes_) {
-      for (TableSlot& table_slot : class_set) {
-        table_slot.VisitRoot(visitor);
-      }
+  for (ClassSet& class_set : classes_) {
+    for (TableSlot& table_slot : class_set) {
+      table_slot.VisitRoot(visitor);
     }
   }
   for (GcRoot<mirror::Object>& root : strong_roots_) {
@@ -108,18 +104,6 @@ void ClassTable::VisitRoots(const Visitor& visitor, bool skip_classes) {
   }
 }
 
-template <class Condition, class Visitor>
-void ClassTable::VisitClassesIfConditionMet(Condition& cond, Visitor& visitor) {
-  ReaderMutexLock mu(Thread::Current(), lock_);
-  for (ClassSet& class_set : classes_) {
-    if (cond(class_set)) {
-      for (TableSlot& table_slot : class_set) {
-        table_slot.VisitRoot(visitor);
-      }
-    }
-  }
-}
-
 template <typename Visitor>
 class ClassTable::TableSlot::ClassAndRootVisitor {
  public:
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 54e066a18c..7e263737c3 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -21,7 +21,7 @@
 #include <utility>
 #include <vector>
 
-#include "base/gc_visited_arena_pool.h"
+#include "base/allocator.h"
 #include "base/hash_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -151,7 +151,7 @@ class ClassTable {
                            TableSlotEmptyFn,
                            ClassDescriptorHash,
                            ClassDescriptorEquals,
-                           GcRootArenaAllocator<TableSlot, kAllocatorTagClassTable>>;
+                           TrackingAllocator<TableSlot, kAllocatorTagClassTable>>;
 
   ClassTable();
 
@@ -181,7 +181,7 @@ class ClassTable {
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Returns the number of class-sets in the class table.
+  // Returns the number of classes in the class table.
   size_t Size() const
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -194,13 +194,17 @@ class ClassTable {
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // NO_THREAD_SAFETY_ANALYSIS for object marking requiring heap bitmap lock.
-  template <class Visitor>
-  void VisitRoots(Visitor& visitor, bool skip_classes = false) NO_THREAD_SAFETY_ANALYSIS
-      REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_);
+  template<class Visitor>
+  void VisitRoots(Visitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template <class Visitor>
-  void VisitRoots(const Visitor& visitor, bool skip_classes = false) NO_THREAD_SAFETY_ANALYSIS
-      REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_);
+  template<class Visitor>
+  void VisitRoots(const Visitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<class Visitor>
   void VisitClassesAndRoots(Visitor& visitor)
@@ -208,10 +212,6 @@ class ClassTable {
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Visit classes in those class-sets which satisfy 'cond'.
-  template <class Condition, class Visitor>
-  void VisitClassesIfConditionMet(Condition& cond, Visitor& visitor) REQUIRES(!lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
   // Stops visit if the visitor returns false.
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
   bool Visit(Visitor& visitor)
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index f49c8cba7a..a8ec6537db 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -303,8 +303,9 @@ static constexpr size_t kMaxNumUffdWorkers = 2;
 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
 // case. It's extremely unlikely that we will ever have more than these number
 // of mutator threads trying to access the moving-space during one compaction
-// phase.
-static constexpr size_t kMutatorCompactionBufferCount = 2048;
+// phase. Using a lower number in debug builds to hopefully catch the issue
+// before it becomes a problem on user builds.
+static constexpr size_t kMutatorCompactionBufferCount = kIsDebugBuild ? 256 : 2048;
 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
 static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB;
 // Concurrent compaction termination logic is different (and slightly more efficient) if the
@@ -2387,7 +2388,7 @@ void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) {
 }
 
 void MarkCompact::UpdateNonMovingSpace() {
-  TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Iterating in reverse ensures that the class pointer in objects which span
   // across more than one page gets updated in the end. This is necessary for
   // VisitRefsForCompaction() to work correctly.
@@ -2604,8 +2605,7 @@ class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
     ClassTable* const class_table = class_loader->GetClassTable();
     if (class_table != nullptr) {
-      // Classes are updated concurrently.
-      class_table->VisitRoots(*this, /*skip_classes=*/true);
+      class_table->VisitRoots(*this);
     }
   }
 
@@ -2636,10 +2636,8 @@ class MarkCompact::LinearAllocPageUpdater {
         moving_space_end_(collector->moving_space_end_),
         last_page_touched_(false) {}
 
-  // Update a page in multi-object arena.
-  void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj)
+  void operator()(uint8_t* page_begin, uint8_t* first_obj) ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(first_obj != nullptr);
     DCHECK_ALIGNED(page_begin, kPageSize);
     uint8_t* page_end = page_begin + kPageSize;
     uint32_t obj_size;
@@ -2669,28 +2667,6 @@ class MarkCompact::LinearAllocPageUpdater {
     last_page_touched_ = true;
   }
 
-  // This version is only used for cases where the entire page is filled with
-  // GC-roots. For example, class-table and intern-table.
-  void SingleObjectArena(uint8_t* page_begin, size_t page_size)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
-    DCHECK_ALIGNED(page_begin, kAlignment);
-    // Least significant bits are used by class-table.
-    static constexpr uint32_t kMask = kObjectAlignment - 1;
-    size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>);
-    uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin);
-    for (size_t i = 0; i < num_roots; root_ptr++, i++) {
-      uint32_t word = *root_ptr;
-      if (word != 0) {
-        uint32_t lsbs = word & kMask;
-        word &= ~kMask;
-        VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word));
-        *root_ptr = word | lsbs;
-        last_page_touched_ = true;
-      }
-    }
-  }
-
   bool WasLastPageTouched() const { return last_page_touched_; }
 
   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
@@ -2709,8 +2685,7 @@ class MarkCompact::LinearAllocPageUpdater {
       if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
         new_ref = collector_->PostCompactBlackObjAddr(old_ref);
       } else if (collector_->live_words_bitmap_->Test(old_ref)) {
-        DCHECK(collector_->moving_space_bitmap_->Test(old_ref))
-            << "ref:" << old_ref << " root:" << root;
+        DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) << old_ref;
         new_ref = collector_->PostCompactOldObjAddr(old_ref);
       }
       if (old_ref != new_ref) {
@@ -2779,34 +2754,9 @@ class MarkCompact::LinearAllocPageUpdater {
   uint8_t* const moving_space_begin_;
   uint8_t* const moving_space_end_;
   // Whether the last page was touched or not.
-  bool last_page_touched_ = false;
+  bool last_page_touched_;
 };
 
-void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) {
-  // If the process is debuggable then redefinition is allowed, which may mean
-  // pre-zygote-fork class-tables may have pointer to class in moving-space.
-  // So visit classes from class-sets that are not in linear-alloc arena-pool.
-  if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) {
-    ClassLinker* linker = runtime->GetClassLinker();
-    ClassLoaderRootsUpdater updater(this);
-    GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
-    auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool {
-      if (!set.empty()) {
-        return immune_class_table_only ?
-               immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) :
-               !pool->Contains(reinterpret_cast<void*>(&*set.begin()));
-      }
-      return false;
-    };
-    linker->VisitClassTables([cond, &updater](ClassTable* table)
-                                 REQUIRES_SHARED(Locks::mutator_lock_) {
-                               table->VisitClassesIfConditionMet(cond, updater);
-                             });
-    ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
-    linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater);
-  }
-}
-
 void MarkCompact::CompactionPause() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime* runtime = Runtime::Current();
@@ -2860,6 +2810,65 @@ void MarkCompact::CompactionPause() {
     heap_->GetReferenceProcessor()->UpdateRoots(this);
   }
   {
+    TimingLogger::ScopedTiming t2("(Paused)UpdateClassLoaderRoots", GetTimings());
+    ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
+    {
+      ClassLoaderRootsUpdater updater(this);
+      runtime->GetClassLinker()->VisitClassLoaders(&updater);
+    }
+  }
+
+  bool has_zygote_space = heap_->HasZygoteSpace();
+  // TODO: Find out why it's not sufficient to visit native roots of immune
+  // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
+  // Is it possible that some native root starts getting pointed to by some object
+  // in moving space after fork? Or are we missing a write-barrier somewhere
+  // when a native root is updated?
+  GcVisitedArenaPool* arena_pool =
+      static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
+  if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
+    // Besides fallback-mode, visit linear-alloc space in the pause for zygote
+    // processes prior to first fork (that's when zygote space gets created).
+    if (kIsDebugBuild && IsValidFd(uffd_)) {
+      // All arenas allocated so far are expected to be pre-zygote fork.
+      arena_pool->ForEachAllocatedArena(
+          [](const TrackedArena& arena)
+              REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
+    }
+    LinearAllocPageUpdater updater(this);
+    arena_pool->VisitRoots(updater);
+  } else {
+    // Clear the flag as we care about this only if arenas are freed during
+    // concurrent compaction.
+    arena_pool->ClearArenasFreed();
+    arena_pool->ForEachAllocatedArena(
+        [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) {
+          // The pre-zygote fork arenas are not visited concurrently in the
+          // zygote children processes. The native roots of the dirty objects
+          // are visited during immune space visit below.
+          if (!arena.IsPreZygoteForkArena()) {
+            uint8_t* last_byte = arena.GetLastUsedByte();
+            CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second);
+          } else {
+            LinearAllocPageUpdater updater(this);
+            arena.VisitRoots(updater);
+          }
+        });
+  }
+
+  SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ true);
+
+  {
+    TimingLogger::ScopedTiming t2("(Paused)UpdateConcurrentRoots", GetTimings());
+    runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
+  }
+  {
+    // TODO: don't visit the transaction roots if it's not active.
+    TimingLogger::ScopedTiming t2("(Paused)UpdateNonThreadRoots", GetTimings());
+    runtime->VisitNonThreadRoots(this);
+  }
+
+  {
     // TODO: Immune space updation has to happen either before or after
     // remapping pre-compact pages to from-space. And depending on when it's
     // done, we have to invoke VisitRefsForCompaction() with or without
@@ -2890,98 +2899,11 @@ void MarkCompact::CompactionPause() {
     }
   }
 
-  {
-    TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings());
-    runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
-    runtime->VisitNonThreadRoots(this);
-    {
-      ClassLinker* linker = runtime->GetClassLinker();
-      ClassLoaderRootsUpdater updater(this);
-      ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
-      linker->VisitClassLoaders(&updater);
-      linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true);
-    }
-    SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true);
-
-    bool has_zygote_space = heap_->HasZygoteSpace();
-    GcVisitedArenaPool* arena_pool =
-        static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
-    // Update immune/pre-zygote class-tables in case class redefinition took
-    // place. pre-zygote class-tables that are not in immune spaces are updated
-    // below if we are in fallback-mode or if there is no zygote space. So in
-    // that case only visit class-tables that are there in immune-spaces.
-    UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space);
-
-    // Acquire arena-pool's lock, which should be released after the pool is
-    // userfaultfd registered. This is to ensure that no new arenas are
-    // allocated and used in between. Since they will not be captured in
-    // linear_alloc_arenas_ below, we will miss updating their pages. The same
-    // reason also applies to new allocations within the existing arena which
-    // may change last_byte.
-    // Since we are in a STW pause, this shouldn't happen anyways, but holding
-    // the lock confirms it.
-    // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it
-    // doesn't fail once it is available for ReaderWriterMutex.
-    WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock());
-
-    // TODO: Find out why it's not sufficient to visit native roots of immune
-    // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
-    // Is it possible that some native root starts getting pointed to by some object
-    // in moving space after fork? Or are we missing a write-barrier somewhere
-    // when a native root is updated?
-    auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size)
-                             REQUIRES_SHARED(Locks::mutator_lock_) {
-                           LinearAllocPageUpdater updater(this);
-                           if (first_obj != nullptr) {
-                             updater.MultiObjectArena(page_begin, first_obj);
-                           } else {
-                             updater.SingleObjectArena(page_begin, page_size);
-                           }
-                         };
-    if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
-      // Besides fallback-mode, visit linear-alloc space in the pause for zygote
-      // processes prior to first fork (that's when zygote space gets created).
-      if (kIsDebugBuild && IsValidFd(uffd_)) {
-        // All arenas allocated so far are expected to be pre-zygote fork.
-        arena_pool->ForEachAllocatedArena(
-            [](const TrackedArena& arena)
-                REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
-      }
-      arena_pool->VisitRoots(arena_visitor);
-    } else {
-      // Inform the arena-pool that compaction is going on. So the TrackedArena
-      // objects corresponding to the arenas that are freed shouldn't be deleted
-      // immediately. We will do that in FinishPhase(). This is to avoid ABA
-      // problem.
-      arena_pool->DeferArenaFreeing();
-      arena_pool->ForEachAllocatedArena(
-          [this, arena_visitor, has_zygote_space](const TrackedArena& arena)
-              REQUIRES_SHARED(Locks::mutator_lock_) {
-            // The pre-zygote fork arenas are not visited concurrently in the
-            // zygote children processes. The native roots of the dirty objects
-            // are visited during immune space visit below.
-            if (!arena.IsPreZygoteForkArena()) {
-              uint8_t* last_byte = arena.GetLastUsedByte();
-              auto ret = linear_alloc_arenas_.insert({&arena, last_byte});
-              CHECK(ret.second);
-            } else if (!arena.IsSingleObjectArena() || !has_zygote_space) {
-              // Pre-zygote class-table and intern-table don't need to be updated.
-              // TODO: Explore the possibility of using /proc/self/pagemap to
-              // fetch which pages in these arenas are private-dirty and then only
-              // visit those pages. To optimize it further, we can keep all
-              // pre-zygote arenas in a single memory range so that just one read
-              // from pagemap is sufficient.
-              arena.VisitRoots(arena_visitor);
-            }
-          });
-    }
-    if (use_uffd_sigbus_) {
-      // Release order wrt to mutator threads' SIGBUS handler load.
-      sigbus_in_progress_count_.store(0, std::memory_order_release);
-    }
-    KernelPreparation();
+  if (use_uffd_sigbus_) {
+    // Release order wrt to mutator threads' SIGBUS handler load.
+    sigbus_in_progress_count_.store(0, std::memory_order_release);
   }
-
+  KernelPreparation();
   UpdateNonMovingSpace();
   // fallback mode
   if (uffd_ == kFallbackMode) {
@@ -3047,7 +2969,7 @@ void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr,
 }
 
 void MarkCompact::KernelPreparation() {
-  TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   uint8_t* moving_space_begin = bump_pointer_space_->Begin();
   size_t moving_space_size = bump_pointer_space_->Capacity();
   int mode = kCopyMode;
@@ -3474,22 +3396,14 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
     arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
                                                             : linear_alloc_arenas_.end();
   }
-  // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here
-  // because a thread trying to access the page and as a result causing this
-  // userfault confirms that nobody can delete the corresponding arena and
-  // release its pages.
-  // NOTE: We may have some memory range be recycled several times during a
-  // compaction cycle, thereby potentially causing userfault on the same page
-  // several times. That's not a problem as all of them (except for possibly the
-  // first one) would require us mapping a zero-page, which we do without updating
-  // the 'state_arr'.
-  if (arena_iter == linear_alloc_arenas_.end() ||
-      arena_iter->first->IsWaitingForDeletion() ||
-      arena_iter->second <= fault_page) {
+  if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) {
     // Fault page isn't in any of the arenas that existed before we started
     // compaction. So map zeropage and return.
     ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false);
   } else {
+    // fault_page should always belong to some arena.
+    DCHECK(arena_iter != linear_alloc_arenas_.end())
+        << "fault_page:" << static_cast<void*>(fault_page) << "is_minor_fault:" << is_minor_fault;
     // Find the linear-alloc space containing fault-page
     LinearAllocSpaceData* space_data = nullptr;
     for (auto& data : linear_alloc_spaces_data_) {
@@ -3514,15 +3428,10 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
           if (state_arr[page_idx].compare_exchange_strong(
                   state, PageState::kProcessingAndMapping, std::memory_order_acquire)) {
             if (kMode == kCopyMode || is_minor_fault) {
-              LinearAllocPageUpdater updater(this);
               uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
-              // null first_obj indicates that it's a page from arena for
-              // intern-table/class-table. So first object isn't required.
-              if (first_obj != nullptr) {
-                updater.MultiObjectArena(fault_page + diff, first_obj + diff);
-              } else {
-                updater.SingleObjectArena(fault_page + diff, kPageSize);
-              }
+              DCHECK_NE(first_obj, nullptr);
+              LinearAllocPageUpdater updater(this);
+              updater(fault_page + diff, first_obj + diff);
               if (kMode == kCopyMode) {
                 MapUpdatedLinearAllocPage(fault_page,
                                           fault_page + diff,
@@ -3591,7 +3500,6 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
 void MarkCompact::ProcessLinearAlloc() {
   GcVisitedArenaPool* arena_pool =
       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
-  DCHECK_EQ(thread_running_gc_, Thread::Current());
   for (auto& pair : linear_alloc_arenas_) {
     const TrackedArena* arena = pair.first;
     size_t arena_size;
@@ -3599,15 +3507,12 @@ void MarkCompact::ProcessLinearAlloc() {
     ptrdiff_t diff;
     bool others_processing;
     {
-      // Acquire arena-pool's lock (in shared-mode) so that the arena being updated
-      // does not get deleted at the same time. If this critical section is too
-      // long and impacts mutator response time, then we get rid of this lock by
-      // holding onto memory ranges of all deleted (since compaction pause)
-      // arenas until completion finishes.
-      ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock());
+      // Acquire arena-pool's lock so that the arena being worked cannot be
+      // deallocated at the same time.
+      std::lock_guard<std::mutex> lock(arena_pool->GetLock());
       // If any arenas were freed since compaction pause then skip them from
       // visiting.
-      if (arena->IsWaitingForDeletion()) {
+      if (arena_pool->AreArenasFreed() && !arena_pool->FindAllocatedArena(arena)) {
         continue;
       }
       uint8_t* last_byte = pair.second;
@@ -3623,12 +3528,11 @@ void MarkCompact::ProcessLinearAlloc() {
           break;
         }
       }
-      CHECK_NE(space_data, nullptr);
+      DCHECK_NE(space_data, nullptr);
       diff = space_data->shadow_.Begin() - space_data->begin_;
       auto visitor = [space_data, last_byte, diff, this, &others_processing](
                          uint8_t* page_begin,
-                         uint8_t* first_obj,
-                         size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
+                         uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) {
         // No need to process pages past last_byte as they already have updated
         // gc-roots, if any.
         if (page_begin >= last_byte) {
@@ -3647,14 +3551,7 @@ void MarkCompact::ProcessLinearAlloc() {
         // reason, we used 'release' order for changing the state to 'processed'.
         if (state_arr[page_idx].compare_exchange_strong(
                 expected_state, desired_state, std::memory_order_acquire)) {
-          // null first_obj indicates that it's a page from arena for
-          // intern-table/class-table. So first object isn't required.
-          if (first_obj != nullptr) {
-            updater.MultiObjectArena(page_begin + diff, first_obj + diff);
-          } else {
-            DCHECK_EQ(page_size, kPageSize);
-            updater.SingleObjectArena(page_begin + diff, page_size);
-          }
+          updater(page_begin + diff, first_obj + diff);
           expected_state = PageState::kProcessing;
           if (!minor_fault_initialized_) {
             MapUpdatedLinearAllocPage(
@@ -4457,9 +4354,6 @@ void MarkCompact::FinishPhase() {
     DCHECK_EQ(fstat(moving_to_space_fd_, &buf), 0) << "fstat failed: " << strerror(errno);
     DCHECK_EQ(buf.st_blocks, 0u);
   }
-  GcVisitedArenaPool* arena_pool =
-      static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
-  arena_pool->DeleteUnusedArenas();
 }
 
 }  // namespace collector
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 1ecb49ae52..3dea40842c 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -560,11 +560,6 @@ class MarkCompact final : public GarbageCollector {
   // Initialize all the info-map related fields of this GC. Returns total size
   // of all the structures in info-map.
   size_t InitializeInfoMap(uint8_t* p, size_t moving_space_sz);
-  // Update class-table classes in compaction pause if we are running in debuggable
-  // mode. Only visit class-table in image spaces if 'immune_class_table_only'
-  // is true.
-  void UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only)
-      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // For checkpoints
   Barrier gc_barrier_;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 329777c7d7..796f8fbf09 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2451,11 +2451,9 @@ void Heap::PreZygoteFork() {
     return;
   }
   Runtime* runtime = Runtime::Current();
-  // Setup linear-alloc pool for post-zygote fork allocations before freezing
-  // snapshots of intern-table and class-table.
-  runtime->SetupLinearAllocForPostZygoteFork(self);
   runtime->GetInternTable()->AddNewTable();
   runtime->GetClassLinker()->MoveClassTableToPreZygote();
+  runtime->SetupLinearAllocForPostZygoteFork(self);
   VLOG(heap) << "Starting PreZygoteFork";
   // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote
   // there.
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 0fcada1af4..d115f51f7e 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_INTERN_TABLE_H_
 #define ART_RUNTIME_INTERN_TABLE_H_
 
+#include "base/allocator.h"
 #include "base/dchecked_vector.h"
-#include "base/gc_visited_arena_pool.h"
 #include "base/hash_set.h"
 #include "base/mutex.h"
 #include "gc/weak_root_state.h"
@@ -109,12 +109,11 @@ class InternTable {
     }
   };
 
-  using UnorderedSet =
-      HashSet<GcRoot<mirror::String>,
-              GcRootEmptyFn,
-              StringHash,
-              StringEquals,
-              GcRootArenaAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>>;
+  using UnorderedSet = HashSet<GcRoot<mirror::String>,
+                               GcRootEmptyFn,
+                               StringHash,
+                               StringEquals,
+                               TrackingAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>>;
 
   InternTable();
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 97db5fb0fd..9c3b9de70c 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -859,11 +859,7 @@ void Runtime::CallExitHook(jint status) {
 }
 
 void Runtime::SweepSystemWeaks(IsMarkedVisitor* visitor) {
-  // Userfaultfd compaction updates weak intern-table page-by-page via
-  // LinearAlloc.
-  if (!GetHeap()->IsPerformingUffdCompaction()) {
-    GetInternTable()->SweepInternTableWeaks(visitor);
-  }
+  GetInternTable()->SweepInternTableWeaks(visitor);
   GetMonitorList()->SweepMonitorList(visitor);
   GetJavaVM()->SweepJniWeakGlobals(visitor);
   GetHeap()->SweepAllocationRecords(visitor);
@@ -2594,14 +2590,8 @@ void Runtime::VisitConstantRoots(RootVisitor* visitor) {
 }
 
 void Runtime::VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) {
-  // Userfaultfd compaction updates intern-tables and class-tables page-by-page
-  // via LinearAlloc. So don't visit them here.
-  if (GetHeap()->IsPerformingUffdCompaction()) {
-    class_linker_->VisitRoots(visitor, flags, /*visit_class_roots=*/false);
-  } else {
-    intern_table_->VisitRoots(visitor, flags);
-    class_linker_->VisitRoots(visitor, flags, /*visit_class_roots=*/true);
-  }
+  intern_table_->VisitRoots(visitor, flags);
+  class_linker_->VisitRoots(visitor, flags);
   jni_id_manager_->VisitRoots(visitor);
   heap_->VisitAllocationRecords(visitor);
   if (jit_ != nullptr) {
author	Lokesh Gidra <lokeshgidra@google.com>	2023-10-17 19:15:23 +0000
committer	Lokesh Gidra <lokeshgidra@google.com>	2023-10-17 21:32:14 +0000
commit	9faffd5c4e062ca45bd6f29a3b6d1b276e6c9839 (patch)
tree	2c49f685bbe21597b707587859f2bae1f1f92495
parent	66b2595485bdc2a9fbadd7a1058f9c9fd0a0cc37 (diff)