Update native GC roots page-by-page

This CL enables updating native GC-roots during compaction page by
page. It does so by using a single-space linear allocator for allocating
ArtMethods/ArtFields/DexCache etc. and using a per-object header to
describe the kind of object/array and its size. Under the hood it still
uses arena allocator but the arenas are page-aligned regions taken from
a single-space.

This allows us in a future CL to use userfaultfd to protect this space
during the compaction pause and then concurrently update the pages
independently.

Bug: 160737021
Test: ART_USE_READ_BARRIER art/test/testrunner/testrunner.py --host
Change-Id: Ie52243741360f6008feccec76117d34c77ab1dcf
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index 986c7e8..04981aa 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -32,7 +32,7 @@
 #include "dwarf/debug_info_entry_writer.h"
 #include "elf/elf_builder.h"
 #include "heap_poisoning.h"
-#include "linear_alloc.h"
+#include "linear_alloc-inl.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
 #include "mirror/class.h"
@@ -478,7 +478,9 @@
     if (methods_ptr == nullptr) {
       // Some types might have no methods.  Allocate empty array instead.
       LinearAlloc* allocator = Runtime::Current()->GetLinearAlloc();
-      void* storage = allocator->Alloc(Thread::Current(), sizeof(LengthPrefixedArray<ArtMethod>));
+      void* storage = allocator->Alloc(Thread::Current(),
+                                       sizeof(LengthPrefixedArray<ArtMethod>),
+                                       LinearAllocKind::kNoGCRoots);
       methods_ptr = new (storage) LengthPrefixedArray<ArtMethod>(0);
       type->SetMethodsPtr(methods_ptr, 0, 0);
       DCHECK(type->GetMethodsPtr() != nullptr);
diff --git a/libartbase/base/arena_allocator.cc b/libartbase/base/arena_allocator.cc
index 76f2883..250a3d9 100644
--- a/libartbase/base/arena_allocator.cc
+++ b/libartbase/base/arena_allocator.cc
@@ -28,8 +28,6 @@
 
 namespace art {
 
-constexpr size_t kMemoryToolRedZoneBytes = 8;
-
 template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
   // Every name should have the same width and end with a space. Abbreviate if necessary:
@@ -247,7 +245,7 @@
   size_t rounded_bytes = bytes + kMemoryToolRedZoneBytes;
   DCHECK_ALIGNED(rounded_bytes, 8);  // `bytes` is 16-byte aligned, red zone is 8-byte aligned.
   uintptr_t padding =
-      ((reinterpret_cast<uintptr_t>(ptr_) + 15u) & 15u) - reinterpret_cast<uintptr_t>(ptr_);
+      RoundUp(reinterpret_cast<uintptr_t>(ptr_), 16) - reinterpret_cast<uintptr_t>(ptr_);
   ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
   uint8_t* ret;
   if (UNLIKELY(padding + rounded_bytes > static_cast<size_t>(end_ - ptr_))) {
diff --git a/libartbase/base/arena_allocator.h b/libartbase/base/arena_allocator.h
index bf7d932..e340994 100644
--- a/libartbase/base/arena_allocator.h
+++ b/libartbase/base/arena_allocator.h
@@ -152,7 +152,7 @@
 
 class ArenaAllocatorMemoryTool {
  public:
-  bool IsRunningOnMemoryTool() { return kMemoryToolIsAvailable; }
+  static constexpr bool IsRunningOnMemoryTool() { return kMemoryToolIsAvailable; }
 
   void MakeDefined(void* ptr, size_t size) {
     if (UNLIKELY(IsRunningOnMemoryTool())) {
@@ -184,7 +184,7 @@
   void Reset();
   // Release is used inbetween uses and uses madvise for memory usage.
   virtual void Release() { }
-  uint8_t* Begin() {
+  uint8_t* Begin() const {
     return memory_;
   }
 
@@ -209,6 +209,8 @@
     return memory_ <= ptr && ptr < memory_ + bytes_allocated_;
   }
 
+  Arena* Next() const { return next_; }
+
  protected:
   size_t bytes_allocated_;
   uint8_t* memory_;
@@ -355,6 +357,19 @@
     return pool_;
   }
 
+  Arena* GetHeadArena() const {
+    return arena_head_;
+  }
+
+  uint8_t* CurrentPtr() const {
+    return ptr_;
+  }
+
+  size_t CurrentArenaUnusedBytes() const {
+    DCHECK_LE(ptr_, end_);
+    return end_ - ptr_;
+  }
+
   bool Contains(const void* ptr) const;
 
   // The alignment guaranteed for individual allocations.
@@ -363,6 +378,9 @@
   // The alignment required for the whole Arena rather than individual allocations.
   static constexpr size_t kArenaAlignment = 16u;
 
+  // Extra bytes required by the memory tool.
+  static constexpr size_t kMemoryToolRedZoneBytes = 8u;
+
  private:
   void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind);
   void* AllocWithMemoryToolAlign16(size_t bytes, ArenaAllocKind kind);
diff --git a/openjdkjvmti/ti_redefine.cc b/openjdkjvmti/ti_redefine.cc
index 2521ff3..a646031 100644
--- a/openjdkjvmti/ti_redefine.cc
+++ b/openjdkjvmti/ti_redefine.cc
@@ -89,7 +89,7 @@
 #include "jni/jni_id_manager.h"
 #include "jvmti.h"
 #include "jvmti_allocator.h"
-#include "linear_alloc.h"
+#include "linear_alloc-inl.h"
 #include "mirror/array-alloc-inl.h"
 #include "mirror/array.h"
 #include "mirror/class-alloc-inl.h"
@@ -310,7 +310,9 @@
         art::ClassLinker* cl = runtime->GetClassLinker();
         auto ptr_size = cl->GetImagePointerSize();
         const size_t method_size = art::ArtMethod::Size(ptr_size);
-        auto* method_storage = allocator_->Alloc(art::Thread::Current(), method_size);
+        auto* method_storage = allocator_->Alloc(art::Thread::Current(),
+                                                 method_size,
+                                                 art::LinearAllocKind::kArtMethod);
         CHECK(method_storage != nullptr) << "Unable to allocate storage for obsolete version of '"
                                          << old_method->PrettyMethod() << "'";
         new_obsolete_method = new (method_storage) art::ArtMethod();
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 6a9c6b7..c473250 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -110,6 +110,7 @@
         "art_method.cc",
         "backtrace_helper.cc",
         "barrier.cc",
+        "base/gc_visited_arena_pool.cc",
         "base/locks.cc",
         "base/mem_map_arena_pool.cc",
         "base/mutex.cc",
@@ -194,7 +195,6 @@
         "jni/jni_env_ext.cc",
         "jni/jni_id_manager.cc",
         "jni/jni_internal.cc",
-        "linear_alloc.cc",
         "method_handles.cc",
         "metrics/reporter.cc",
         "mirror/array.cc",
@@ -573,6 +573,7 @@
         "indirect_reference_table.h",
         "jdwp_provider.h",
         "jni_id_type.h",
+        "linear_alloc.h",
         "lock_word.h",
         "oat_file.h",
         "process_state.h",
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 5cefedb..172b78d 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -26,7 +26,7 @@
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "imt_conflict_table.h"
 #include "jni/jni_internal.h"
-#include "linear_alloc.h"
+#include "linear_alloc-inl.h"
 #include "mirror/class-alloc-inl.h"
 #include "mirror/string-inl.h"
 #include "mirror/object_array-alloc-inl.h"
@@ -1776,7 +1776,8 @@
       Runtime::Current()->GetClassLinker()->CreateImtConflictTable(/*count=*/0u, linear_alloc);
   void* data = linear_alloc->Alloc(
       self,
-      ImtConflictTable::ComputeSizeWithOneMoreEntry(empty_conflict_table, kRuntimePointerSize));
+      ImtConflictTable::ComputeSizeWithOneMoreEntry(empty_conflict_table, kRuntimePointerSize),
+      LinearAllocKind::kNoGCRoots);
   ImtConflictTable* new_table = new (data) ImtConflictTable(
       empty_conflict_table, inf_contains, contains_amethod, kRuntimePointerSize);
   conflict_method->SetImtConflictTable(new_table, kRuntimePointerSize);
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 5f23f1e..80af230 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -64,6 +64,30 @@
   declaring_class_ = GcRoot<mirror::Class>(new_declaring_class);
 }
 
+template<typename RootVisitorType>
+void ArtField::VisitArrayRoots(RootVisitorType& visitor,
+                               uint8_t* start_boundary,
+                               uint8_t* end_boundary,
+                               LengthPrefixedArray<ArtField>* array) {
+  DCHECK_LE(start_boundary, end_boundary);
+  DCHECK_NE(array->size(), 0u);
+  ArtField* first_field = &array->At(0);
+  DCHECK_LE(static_cast<void*>(end_boundary), static_cast<void*>(first_field + array->size()));
+  static constexpr size_t kFieldSize = sizeof(ArtField);
+  static_assert(IsPowerOfTwo(kFieldSize));
+  uint8_t* declaring_class =
+      reinterpret_cast<uint8_t*>(first_field) + DeclaringClassOffset().Int32Value();
+  // Jump to the first class to visit.
+  if (declaring_class < start_boundary) {
+    declaring_class += RoundUp(start_boundary - declaring_class, kFieldSize);
+  }
+  while (declaring_class < end_boundary) {
+    visitor.VisitRoot(
+        reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(declaring_class));
+    declaring_class += kFieldSize;
+  }
+}
+
 inline MemberOffset ArtField::GetOffsetDuringLinking() {
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
   return MemberOffset(offset_);
diff --git a/runtime/art_field.h b/runtime/art_field.h
index 4e77e7f..c205920 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -27,6 +27,7 @@
 namespace art {
 
 class DexFile;
+template<typename T> class LengthPrefixedArray;
 class ScopedObjectAccessAlreadyRunnable;
 
 namespace mirror {
@@ -39,6 +40,15 @@
 
 class ArtField final {
  public:
+  // Visit declaring classes of all the art-fields in 'array' that reside
+  // in [start_boundary, end_boundary).
+  template<typename RootVisitorType>
+  static void VisitArrayRoots(RootVisitorType& visitor,
+                              uint8_t* start_boundary,
+                              uint8_t* end_boundary,
+                              LengthPrefixedArray<ArtField>* array)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ObjPtr<mirror::Class> GetDeclaringClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index b071714..6499bac 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -405,6 +405,49 @@
   }
 }
 
+template<typename RootVisitorType>
+void ArtMethod::VisitRoots(RootVisitorType& visitor,
+                           uint8_t* start_boundary,
+                           uint8_t* end_boundary,
+                           ArtMethod* method) {
+  mirror::CompressedReference<mirror::Object>* cls_ptr =
+      reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(
+          reinterpret_cast<uint8_t*>(method) + DeclaringClassOffset().Int32Value());
+  if (reinterpret_cast<uint8_t*>(cls_ptr) >= start_boundary
+      && reinterpret_cast<uint8_t*>(cls_ptr) < end_boundary) {
+    visitor.VisitRootIfNonNull(cls_ptr);
+  }
+}
+
+template<PointerSize kPointerSize, typename RootVisitorType>
+void ArtMethod::VisitArrayRoots(RootVisitorType& visitor,
+                                uint8_t* start_boundary,
+                                uint8_t* end_boundary,
+                                LengthPrefixedArray<ArtMethod>* array) {
+  DCHECK_LE(start_boundary, end_boundary);
+  DCHECK_NE(array->size(), 0u);
+  static constexpr size_t kMethodSize = ArtMethod::Size(kPointerSize);
+  ArtMethod* first_method = &array->At(0, kMethodSize, ArtMethod::Alignment(kPointerSize));
+  DCHECK_LE(static_cast<void*>(end_boundary),
+            static_cast<void*>(reinterpret_cast<uint8_t*>(first_method)
+                               + array->size() * kMethodSize));
+  uint8_t* declaring_class =
+      reinterpret_cast<uint8_t*>(first_method) + DeclaringClassOffset().Int32Value();
+  // Jump to the first class to visit.
+  if (declaring_class < start_boundary) {
+    size_t remainder = (start_boundary - declaring_class) % kMethodSize;
+    declaring_class = start_boundary;
+    if (remainder > 0) {
+      declaring_class += kMethodSize - remainder;
+    }
+  }
+  while (declaring_class < end_boundary) {
+    visitor.VisitRootIfNonNull(
+        reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(declaring_class));
+    declaring_class += kMethodSize;
+  }
+}
+
 template <typename Visitor>
 inline void ArtMethod::UpdateEntrypoints(const Visitor& visitor, PointerSize pointer_size) {
   if (IsNative()) {
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 689f621..8347577 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -49,6 +49,7 @@
 class ImtConflictTable;
 enum InvokeType : uint32_t;
 union JValue;
+template<typename T> class LengthPrefixedArray;
 class OatQuickMethodHeader;
 class ProfilingInfo;
 class ScopedObjectAccessAlreadyRunnable;
@@ -87,6 +88,23 @@
                                         jobject jlr_method)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Visit the declaring class in 'method' if it is within [start_boundary, end_boundary).
+  template<typename RootVisitorType>
+  static void VisitRoots(RootVisitorType& visitor,
+                         uint8_t* start_boundary,
+                         uint8_t* end_boundary,
+                         ArtMethod* method)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Visit declaring classes of all the art-methods in 'array' that reside
+  // in [start_boundary, end_boundary).
+  template<PointerSize kPointerSize, typename RootVisitorType>
+  static void VisitArrayRoots(RootVisitorType& visitor,
+                              uint8_t* start_boundary,
+                              uint8_t* end_boundary,
+                              LengthPrefixedArray<ArtMethod>* array)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE ObjPtr<mirror::Class> GetDeclaringClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc
new file mode 100644
index 0000000..dd29c7f
--- /dev/null
+++ b/runtime/base/gc_visited_arena_pool.cc
@@ -0,0 +1,254 @@
+/*
+ * Copyright 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/gc_visited_arena_pool.h"
+
+#include "base/arena_allocator-inl.h"
+#include "base/utils.h"
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+namespace art {
+
+#if defined(__LP64__)
+// Use a size in multiples of 1GB as that can utilize the optimized mremap
+// page-table move.
+static constexpr size_t kLinearAllocPoolSize = 1 * GB;
+static constexpr size_t kLow4GBLinearAllocPoolSize = 32 * MB;
+#else
+static constexpr size_t kLinearAllocPoolSize = 32 * MB;
+#endif
+
+TrackedArena::TrackedArena(uint8_t* start, size_t size) : Arena(), first_obj_array_(nullptr) {
+  static_assert(ArenaAllocator::kArenaAlignment <= kPageSize,
+                "Arena should not need stronger alignment than kPageSize.");
+  DCHECK_ALIGNED(size, kPageSize);
+  DCHECK_ALIGNED(start, kPageSize);
+  memory_ = start;
+  size_ = size;
+  size_t arr_size = size / kPageSize;
+  first_obj_array_.reset(new uint8_t*[arr_size]);
+  std::fill_n(first_obj_array_.get(), arr_size, nullptr);
+}
+
+void TrackedArena::Release() {
+  if (bytes_allocated_ > 0) {
+    ZeroAndReleasePages(Begin(), Size());
+    std::fill_n(first_obj_array_.get(), Size() / kPageSize, nullptr);
+    bytes_allocated_ = 0;
+  }
+}
+
+void TrackedArena::SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end) {
+  DCHECK_LE(static_cast<void*>(Begin()), static_cast<void*>(obj_end));
+  DCHECK_LT(static_cast<void*>(obj_begin), static_cast<void*>(obj_end));
+  size_t idx = static_cast<size_t>(obj_begin - Begin()) / kPageSize;
+  size_t last_byte_idx = static_cast<size_t>(obj_end - 1 - Begin()) / kPageSize;
+  // If the addr is at the beginning of a page, then we set it for that page too.
+  if (IsAligned<kPageSize>(obj_begin)) {
+    first_obj_array_[idx] = obj_begin;
+  }
+  while (idx < last_byte_idx) {
+    first_obj_array_[++idx] = obj_begin;
+  }
+}
+
+void GcVisitedArenaPool::AddMap(size_t min_size) {
+  size_t size = std::max(min_size, kLinearAllocPoolSize);
+#if defined(__LP64__)
+  // This is true only when we are running a 64-bit dex2oat to compile a 32-bit image.
+  if (low_4gb_) {
+    size = std::max(min_size, kLow4GBLinearAllocPoolSize);
+  }
+#endif
+  std::string err_msg;
+  maps_.emplace_back(MemMap::MapAnonymous(name_,
+                                          size,
+                                          PROT_READ | PROT_WRITE,
+                                          low_4gb_,
+                                          &err_msg));
+  MemMap& map = maps_.back();
+  if (!map.IsValid()) {
+    LOG(FATAL) << "Failed to allocate " << name_
+               << ": " << err_msg;
+    UNREACHABLE();
+  }
+  Chunk* chunk = new Chunk(map.Begin(), map.Size());
+  best_fit_allocs_.insert(chunk);
+  free_chunks_.insert(chunk);
+}
+
+GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, const char* name)
+  : bytes_allocated_(0), name_(name), low_4gb_(low_4gb) {
+  std::lock_guard<std::mutex> lock(lock_);
+  // It's extremely rare to have more than one map.
+  maps_.reserve(1);
+  AddMap(/*min_size=*/0);
+}
+
+GcVisitedArenaPool::~GcVisitedArenaPool() {
+  for (Chunk* chunk : free_chunks_) {
+    delete chunk;
+  }
+  // Must not delete chunks from best_fit_allocs_ as they are shared with
+  // free_chunks_.
+}
+
+size_t GcVisitedArenaPool::GetBytesAllocated() const {
+  std::lock_guard<std::mutex> lock(lock_);
+  return bytes_allocated_;
+}
+
+Arena* GcVisitedArenaPool::AllocArena(size_t size) {
+  // Return only page aligned sizes so that madvise can be leveraged.
+  size = RoundUp(size, kPageSize);
+  Chunk temp_chunk(nullptr, size);
+  std::lock_guard<std::mutex> lock(lock_);
+  auto best_fit_iter = best_fit_allocs_.lower_bound(&temp_chunk);
+  if (UNLIKELY(best_fit_iter == best_fit_allocs_.end())) {
+    AddMap(size);
+    best_fit_iter = best_fit_allocs_.lower_bound(&temp_chunk);
+    CHECK(best_fit_iter != best_fit_allocs_.end());
+  }
+  auto free_chunks_iter = free_chunks_.find(*best_fit_iter);
+  DCHECK(free_chunks_iter != free_chunks_.end());
+  Chunk* chunk = *best_fit_iter;
+  DCHECK_EQ(chunk, *free_chunks_iter);
+  // if the best-fit chunk < 2x the requested size, then give the whole chunk.
+  if (chunk->size_ < 2 * size) {
+    DCHECK_GE(chunk->size_, size);
+    auto emplace_result = allocated_arenas_.emplace(chunk->addr_, chunk->size_);
+    DCHECK(emplace_result.second);
+    free_chunks_.erase(free_chunks_iter);
+    best_fit_allocs_.erase(best_fit_iter);
+    delete chunk;
+    return const_cast<TrackedArena*>(&(*emplace_result.first));
+  } else {
+    auto emplace_result = allocated_arenas_.emplace(chunk->addr_, size);
+    DCHECK(emplace_result.second);
+    // Compute next iterators for faster insert later.
+    auto next_best_fit_iter = best_fit_iter;
+    next_best_fit_iter++;
+    auto next_free_chunks_iter = free_chunks_iter;
+    next_free_chunks_iter++;
+    auto best_fit_nh = best_fit_allocs_.extract(best_fit_iter);
+    auto free_chunks_nh = free_chunks_.extract(free_chunks_iter);
+    best_fit_nh.value()->addr_ += size;
+    best_fit_nh.value()->size_ -= size;
+    DCHECK_EQ(free_chunks_nh.value()->addr_, chunk->addr_);
+    best_fit_allocs_.insert(next_best_fit_iter, std::move(best_fit_nh));
+    free_chunks_.insert(next_free_chunks_iter, std::move(free_chunks_nh));
+    return const_cast<TrackedArena*>(&(*emplace_result.first));
+  }
+}
+
+void GcVisitedArenaPool::FreeRangeLocked(uint8_t* range_begin, size_t range_size) {
+  Chunk temp_chunk(range_begin, range_size);
+  bool merge_with_next = false;
+  bool merge_with_prev = false;
+  auto next_iter = free_chunks_.lower_bound(&temp_chunk);
+  auto iter_for_extract = free_chunks_.end();
+  // Can we merge with the previous chunk?
+  if (next_iter != free_chunks_.begin()) {
+    auto prev_iter = next_iter;
+    prev_iter--;
+    merge_with_prev = (*prev_iter)->addr_ + (*prev_iter)->size_ == range_begin;
+    if (merge_with_prev) {
+      range_begin = (*prev_iter)->addr_;
+      range_size += (*prev_iter)->size_;
+      // Hold on to the iterator for faster extract later
+      iter_for_extract = prev_iter;
+    }
+  }
+  // Can we merge with the next chunk?
+  if (next_iter != free_chunks_.end()) {
+    merge_with_next = range_begin + range_size == (*next_iter)->addr_;
+    if (merge_with_next) {
+      range_size += (*next_iter)->size_;
+      if (merge_with_prev) {
+        auto iter = next_iter;
+        next_iter++;
+        // Keep only one of the two chunks to be expanded.
+        Chunk* chunk = *iter;
+        size_t erase_res = best_fit_allocs_.erase(chunk);
+        DCHECK_EQ(erase_res, 1u);
+        free_chunks_.erase(iter);
+        delete chunk;
+      } else {
+        iter_for_extract = next_iter;
+        next_iter++;
+      }
+    }
+  }
+
+  // Extract-insert avoids 2/4 destroys and 2/2 creations
+  // as compared to erase-insert, so use that when merging.
+  if (merge_with_prev || merge_with_next) {
+    auto free_chunks_nh = free_chunks_.extract(iter_for_extract);
+    auto best_fit_allocs_nh = best_fit_allocs_.extract(*iter_for_extract);
+
+    free_chunks_nh.value()->addr_ = range_begin;
+    DCHECK_EQ(best_fit_allocs_nh.value()->addr_, range_begin);
+    free_chunks_nh.value()->size_ = range_size;
+    DCHECK_EQ(best_fit_allocs_nh.value()->size_, range_size);
+
+    free_chunks_.insert(next_iter, std::move(free_chunks_nh));
+    // Since the chunk's size has expanded, the hint won't be useful
+    // for best-fit set.
+    best_fit_allocs_.insert(std::move(best_fit_allocs_nh));
+  } else {
+    DCHECK(iter_for_extract == free_chunks_.end());
+    Chunk* chunk = new Chunk(range_begin, range_size);
+    free_chunks_.insert(next_iter, chunk);
+    best_fit_allocs_.insert(chunk);
+  }
+}
+
+void GcVisitedArenaPool::FreeArenaChain(Arena* first) {
+  if (kRunningOnMemoryTool) {
+    for (Arena* arena = first; arena != nullptr; arena = arena->Next()) {
+      MEMORY_TOOL_MAKE_UNDEFINED(arena->Begin(), arena->GetBytesAllocated());
+    }
+  }
+
+  // TODO: Handle the case when arena_allocator::kArenaAllocatorPreciseTracking
+  // is true. See MemMapArenaPool::FreeArenaChain() for example.
+  CHECK(!arena_allocator::kArenaAllocatorPreciseTracking);
+
+  // madvise the arenas before acquiring lock for scalability
+  for (Arena* temp = first; temp != nullptr; temp = temp->Next()) {
+    temp->Release();
+  }
+
+  std::lock_guard<std::mutex> lock(lock_);
+  while (first != nullptr) {
+    FreeRangeLocked(first->Begin(), first->Size());
+    // In other implementations of ArenaPool this is calculated when asked for,
+    // thanks to the list of free arenas that is kept around. But in this case,
+    // we release the freed arena back to the pool and therefore need to
+    // calculate here.
+    bytes_allocated_ += first->GetBytesAllocated();
+    TrackedArena* temp = down_cast<TrackedArena*>(first);
+    first = first->Next();
+    size_t erase_count = allocated_arenas_.erase(*temp);
+    DCHECK_EQ(erase_count, 1u);
+  }
+}
+
+}  // namespace art
+
diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h
new file mode 100644
index 0000000..7dc79af
--- /dev/null
+++ b/runtime/base/gc_visited_arena_pool.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_
+#define ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_
+
+#include "base/casts.h"
+#include "base/arena_allocator.h"
+#include "base/locks.h"
+#include "base/mem_map.h"
+
+#include <set>
+
+namespace art {
+
+// GcVisitedArenaPool can be used for tracking allocations so that they can
+// be visited during GC to update the GC-roots inside them.
+
+// An Arena which tracks its allocations.
+class TrackedArena final : public Arena {
+ public:
+  TrackedArena(uint8_t* start, size_t size);
+
+  template <typename PageVisitor>
+  void VisitRoots(PageVisitor& visitor) const REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK_ALIGNED(Size(), kPageSize);
+    DCHECK_ALIGNED(Begin(), kPageSize);
+    int nr_pages = Size() / kPageSize;
+    uint8_t* page_begin = Begin();
+    for (int i = 0; i < nr_pages && first_obj_array_[i] != nullptr; i++, page_begin += kPageSize) {
+      visitor(page_begin, first_obj_array_[i]);
+    }
+  }
+
+  // Set 'obj_begin' in first_obj_array_ in every element for which it's the
+  // first object.
+  void SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end);
+
+  void Release() override;
+
+ private:
+  // first_obj_array_[i] is the object that overlaps with the ith page's
+  // beginning, i.e. first_obj_array_[i] <= ith page_begin.
+  std::unique_ptr<uint8_t*[]> first_obj_array_;
+};
+
+// An arena-pool wherein allocations can be tracked so that the GC can visit all
+// the GC roots. All the arenas are allocated in one sufficiently large memory
+// range to avoid multiple calls to mremapped/mprotected syscalls.
+class GcVisitedArenaPool final : public ArenaPool {
+ public:
+  explicit GcVisitedArenaPool(bool low_4gb = false, const char* name = "LinearAlloc");
+  virtual ~GcVisitedArenaPool();
+  Arena* AllocArena(size_t size) override;
+  void FreeArenaChain(Arena* first) override;
+  size_t GetBytesAllocated() const override;
+  void ReclaimMemory() override {}
+  void LockReclaimMemory() override {}
+  void TrimMaps() override {}
+
+  template <typename PageVisitor>
+  void VisitRoots(PageVisitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_) {
+    std::lock_guard<std::mutex> lock(lock_);
+    for (auto& arena : allocated_arenas_) {
+      arena.VisitRoots(visitor);
+    }
+  }
+
+ private:
+  void FreeRangeLocked(uint8_t* range_begin, size_t range_size) REQUIRES(lock_);
+  // Add a map to the pool of at least min_size
+  void AddMap(size_t min_size) REQUIRES(lock_);
+
+  class Chunk {
+   public:
+    Chunk(uint8_t* addr, size_t size) : addr_(addr), size_(size) {}
+    uint8_t* addr_;
+    size_t size_;
+  };
+
+  class LessByChunkAddr {
+   public:
+    bool operator()(const Chunk* a, const Chunk* b) const {
+      return std::less<uint8_t*>{}(a->addr_, b->addr_);
+    }
+  };
+
+  class LessByChunkSize {
+   public:
+    // Since two chunks could have the same size, use addr when that happens.
+    bool operator()(const Chunk* a, const Chunk* b) const {
+      return std::less<size_t>{}(a->size_, b->size_)
+             || (std::equal_to<size_t>{}(a->size_, b->size_)
+                 && std::less<uint8_t*>{}(a->addr_, b->addr_));
+    }
+  };
+
+  class LessByArenaAddr {
+   public:
+    bool operator()(const TrackedArena& a, const TrackedArena& b) const {
+      return std::less<uint8_t*>{}(a.Begin(), b.Begin());
+    }
+  };
+
+  // Use a std::mutex here as Arenas are second-from-the-bottom when using MemMaps, and MemMap
+  // itself uses std::mutex scoped to within an allocate/free only.
+  mutable std::mutex lock_;
+  std::vector<MemMap> maps_ GUARDED_BY(lock_);
+  std::set<Chunk*, LessByChunkSize> best_fit_allocs_ GUARDED_BY(lock_);
+  std::set<Chunk*, LessByChunkAddr> free_chunks_ GUARDED_BY(lock_);
+  // Set of allocated arenas. It's required to be able to find the arena
+  // corresponding to a given address.
+  // TODO: We can manage without this set if we decide to have a large
+  // 'first-object' array for the entire space, instead of per arena. Analyse
+  // which approach is better.
+  std::set<TrackedArena, LessByArenaAddr> allocated_arenas_ GUARDED_BY(lock_);
+  // Number of bytes allocated so far.
+  size_t bytes_allocated_ GUARDED_BY(lock_);
+  const char* name_;
+  const bool low_4gb_;
+
+  DISALLOW_COPY_AND_ASSIGN(GcVisitedArenaPool);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 67a20a8..c0d99b9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -44,6 +44,7 @@
 #include "base/hash_set.h"
 #include "base/leb128.h"
 #include "base/logging.h"
+#include "base/mem_map_arena_pool.h"
 #include "base/metrics/metrics.h"
 #include "base/mutex-inl.h"
 #include "base/os.h"
@@ -97,7 +98,7 @@
 #include "jit/jit_code_cache.h"
 #include "jni/java_vm_ext.h"
 #include "jni/jni_internal.h"
-#include "linear_alloc.h"
+#include "linear_alloc-inl.h"
 #include "mirror/array-alloc-inl.h"
 #include "mirror/array-inl.h"
 #include "mirror/call_site.h"
@@ -3487,7 +3488,7 @@
   // If the ArtField alignment changes, review all uses of LengthPrefixedArray<ArtField>.
   static_assert(alignof(ArtField) == 4, "ArtField alignment is expected to be 4.");
   size_t storage_size = LengthPrefixedArray<ArtField>::ComputeSize(length);
-  void* array_storage = allocator->Alloc(self, storage_size);
+  void* array_storage = allocator->Alloc(self, storage_size, LinearAllocKind::kArtFieldArray);
   auto* ret = new(array_storage) LengthPrefixedArray<ArtField>(length);
   CHECK(ret != nullptr);
   std::uninitialized_fill_n(&ret->At(0), length, ArtField());
@@ -3504,7 +3505,7 @@
   const size_t method_size = ArtMethod::Size(image_pointer_size_);
   const size_t storage_size =
       LengthPrefixedArray<ArtMethod>::ComputeSize(length, method_size, method_alignment);
-  void* array_storage = allocator->Alloc(self, storage_size);
+  void* array_storage = allocator->Alloc(self, storage_size, LinearAllocKind::kArtMethodArray);
   auto* ret = new (array_storage) LengthPrefixedArray<ArtMethod>(length);
   CHECK(ret != nullptr);
   for (size_t i = 0; i < length; ++i) {
@@ -5918,7 +5919,9 @@
     if (imt == nullptr) {
       LinearAlloc* allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
       imt = reinterpret_cast<ImTable*>(
-          allocator->Alloc(self, ImTable::SizeInBytes(image_pointer_size_)));
+          allocator->Alloc(self,
+                           ImTable::SizeInBytes(image_pointer_size_),
+                           LinearAllocKind::kNoGCRoots));
       if (imt == nullptr) {
         return false;
       }
@@ -6201,8 +6204,9 @@
   // Allocate a new table. Note that we will leak this table at the next conflict,
   // but that's a tradeoff compared to making the table fixed size.
   void* data = linear_alloc->Alloc(
-      Thread::Current(), ImtConflictTable::ComputeSizeWithOneMoreEntry(current_table,
-                                                                       image_pointer_size_));
+      Thread::Current(),
+      ImtConflictTable::ComputeSizeWithOneMoreEntry(current_table, image_pointer_size_),
+      LinearAllocKind::kNoGCRoots);
   if (data == nullptr) {
     LOG(ERROR) << "Failed to allocate conflict table";
     return conflict_method;
@@ -6316,8 +6320,8 @@
                                                       LinearAlloc* linear_alloc,
                                                       PointerSize image_pointer_size) {
   void* data = linear_alloc->Alloc(Thread::Current(),
-                                   ImtConflictTable::ComputeSize(count,
-                                                                 image_pointer_size));
+                                   ImtConflictTable::ComputeSize(count, image_pointer_size),
+                                   LinearAllocKind::kNoGCRoots);
   return (data != nullptr) ? new (data) ImtConflictTable(count, image_pointer_size) : nullptr;
 }
 
@@ -6933,7 +6937,7 @@
         klass_(klass),
         self_(self),
         runtime_(runtime),
-        stack_(runtime->GetLinearAlloc()->GetArenaPool()),
+        stack_(runtime->GetArenaPool()),
         allocator_(&stack_),
         copied_method_records_(copied_method_records_initial_buffer_,
                                kCopiedMethodRecordInitialBufferSize,
@@ -7013,6 +7017,10 @@
                                                                             kMethodSize,
                                                                             kMethodAlignment);
         memset(old_methods, 0xFEu, old_size);
+        // Set size to 0 to avoid visiting declaring classes.
+        if (gUseUserfaultfd) {
+          old_methods->SetSize(0);
+        }
       }
     }
   }
@@ -7615,16 +7623,25 @@
   const size_t old_methods_ptr_size = (old_methods != nullptr) ? old_size : 0;
   auto* methods = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
       class_linker_->GetAllocatorForClassLoader(klass->GetClassLoader())->Realloc(
-          self_, old_methods, old_methods_ptr_size, new_size));
+          self_, old_methods, old_methods_ptr_size, new_size, LinearAllocKind::kArtMethodArray));
   CHECK(methods != nullptr);  // Native allocation failure aborts.
 
   if (methods != old_methods) {
-    StrideIterator<ArtMethod> out = methods->begin(kMethodSize, kMethodAlignment);
-    // Copy over the old methods. The `ArtMethod::CopyFrom()` is only necessary to not miss
-    // read barriers since `LinearAlloc::Realloc()` won't do read barriers when it copies.
-    for (auto& m : klass->GetMethods(kPointerSize)) {
-      out->CopyFrom(&m, kPointerSize);
-      ++out;
+    if (gUseReadBarrier) {
+      StrideIterator<ArtMethod> out = methods->begin(kMethodSize, kMethodAlignment);
+      // Copy over the old methods. The `ArtMethod::CopyFrom()` is only necessary to not miss
+      // read barriers since `LinearAlloc::Realloc()` won't do read barriers when it copies.
+      for (auto& m : klass->GetMethods(kPointerSize)) {
+        out->CopyFrom(&m, kPointerSize);
+        ++out;
+      }
+    } else if (gUseUserfaultfd) {
+      // Clear the declaring class of the old dangling method array so that GC doesn't
+      // try to update them, which could cause crashes in userfaultfd GC due to
+      // checks in post-compact address computation.
+      for (auto& m : klass->GetMethods(kPointerSize)) {
+        m.SetDeclaringClass(nullptr);
+      }
     }
   }
 
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index be8d325..403a382 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -1392,7 +1392,6 @@
       // BumpPointerSpace::Walk() also works similarly.
       while (black_allocs < block_end
              && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
-        RememberDexCaches(obj);
         if (first_obj == nullptr) {
           first_obj = obj;
         }
@@ -1524,95 +1523,135 @@
   MarkCompact* const collector_;
 };
 
-// TODO: JVMTI redefinition leads to situations wherein new class object(s) and the
-// corresponding native roots are setup but are not linked to class tables and
-// therefore are not accessible, leading to memory corruption.
-class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor, public DexCacheVisitor {
+class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor {
  public:
-  explicit NativeRootsUpdateVisitor(MarkCompact* collector, PointerSize pointer_size)
-    : collector_(collector), pointer_size_(pointer_size) {}
-
-  ~NativeRootsUpdateVisitor() {
-    LOG(INFO) << "num_classes: " << classes_visited_.size()
-              << " num_dex_caches: " << dex_caches_visited_.size();
-  }
+  explicit NativeRootsUpdateVisitor(MarkCompact* collector)
+      : collector_(collector),
+        pointer_size_(Runtime::Current()->GetClassLinker()->GetImagePointerSize()) {}
 
   void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
     ClassTable* const class_table = class_loader->GetClassTable();
     if (class_table != nullptr) {
-      class_table->VisitClassesAndRoots(*this);
+      class_table->VisitRoots(*this);
     }
   }
 
-  void Visit(ObjPtr<mirror::DexCache> dex_cache) override
-      REQUIRES_SHARED(Locks::dex_lock_, Locks::mutator_lock_)
-      REQUIRES(Locks::heap_bitmap_lock_) {
-    if (!dex_cache.IsNull()) {
-      uint32_t cache = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(dex_cache.Ptr()));
-      if (dex_caches_visited_.insert(cache).second) {
-        dex_cache->VisitNativeRoots<kDefaultVerifyFlags, kWithoutReadBarrier>(*this);
-        collector_->dex_caches_.erase(cache);
+  void operator()(uint8_t* page_begin, uint8_t* first_obj)
+      ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK_ALIGNED(page_begin, kPageSize);
+    uint8_t* page_end = page_begin + kPageSize;
+    uint32_t obj_size;
+    for (uint8_t* byte = first_obj; byte < page_end;) {
+      TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
+      obj_size = header->GetSize();
+      LinearAllocKind kind = header->GetKind();
+      if (obj_size == 0) {
+        // No more objects in this page to visit.
+        DCHECK_EQ(static_cast<uint32_t>(kind), 0u);
+        break;
       }
+      uint8_t* obj = byte + sizeof(TrackingHeader);
+      uint8_t* obj_end = byte + obj_size;
+      if (header->Is16Aligned()) {
+        obj = AlignUp(obj, 16);
+      }
+      if (UNLIKELY(obj >= page_end)) {
+        break;
+      }
+      VisitObject(kind, obj, std::max(obj, page_begin), std::min(obj_end, page_end));
+      if (ArenaAllocator::IsRunningOnMemoryTool()) {
+        obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
+      }
+      byte += RoundUp(obj_size, LinearAlloc::kAlignment);
     }
   }
 
-  void VisitDexCache(mirror::DexCache* dex_cache)
-      REQUIRES_SHARED(Locks::dex_lock_, Locks::mutator_lock_)
-      REQUIRES(Locks::heap_bitmap_lock_) {
-    dex_cache->VisitNativeRoots<kDefaultVerifyFlags, kWithoutReadBarrier>(*this);
-  }
-
-  void operator()(mirror::Object* obj)
-      ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(obj->IsClass<kDefaultVerifyFlags>());
-    ObjPtr<mirror::Class> klass = obj->AsClass<kDefaultVerifyFlags>();
-    VisitClassRoots(klass);
-  }
-
-  // For ClassTable::Visit()
-  bool operator()(ObjPtr<mirror::Class> klass)
-      ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (!klass.IsNull()) {
-      VisitClassRoots(klass);
-    }
-    return true;
-  }
-
   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
-      ALWAYS_INLINE
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+      ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
     if (!root->IsNull()) {
       VisitRoot(root);
     }
   }
 
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
-      ALWAYS_INLINE
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    collector_->UpdateRoot(root);
-  }
-
- private:
-  void VisitClassRoots(ObjPtr<mirror::Class> klass)
       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
-    mirror::Class* klass_ptr = klass.Ptr();
-    uint32_t k = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(klass_ptr));
-    // No reason to visit native roots of class in immune spaces.
-    if ((collector_->bump_pointer_space_->HasAddress(klass_ptr)
-         || collector_->non_moving_space_->HasAddress(klass_ptr))
-        && classes_visited_.insert(k).second) {
-      klass->VisitNativeRoots<kWithoutReadBarrier, /*kVisitProxyMethod*/false>(*this,
-                                                                               pointer_size_);
-      klass->VisitObsoleteDexCaches<kWithoutReadBarrier>(*this);
-      klass->VisitObsoleteClass<kWithoutReadBarrier>(*this);
+    mirror::Object* old_ref = root->AsMirrorPtr();
+    DCHECK_NE(old_ref, nullptr);
+    if (collector_->live_words_bitmap_->HasAddress(old_ref)) {
+      if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
+        mirror::Object* new_ref = collector_->PostCompactBlackObjAddr(old_ref);
+        root->Assign(new_ref);
+      } else if (collector_->live_words_bitmap_->Test(old_ref)) {
+        DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) << old_ref;
+        mirror::Object* new_ref = collector_->PostCompactOldObjAddr(old_ref);
+        root->Assign(new_ref);
+      }
     }
   }
 
-  std::unordered_set<uint32_t> dex_caches_visited_;
-  std::unordered_set<uint32_t> classes_visited_;
+ private:
+  void VisitObject(LinearAllocKind kind,
+                   void* obj,
+                   uint8_t* start_boundary,
+                   uint8_t* end_boundary)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    switch (kind) {
+      case LinearAllocKind::kGCRootArray:
+        {
+          GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
+          GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
+          for (; root < last; root++) {
+            VisitRootIfNonNull(root->AddressWithoutBarrier());
+          }
+        }
+        break;
+      case LinearAllocKind::kArtMethodArray:
+        {
+          LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
+          // Old methods are clobbered in debug builds. Check size to confirm if the array
+          // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
+          if (array->size() > 0) {
+            if (pointer_size_ == PointerSize::k64) {
+              ArtMethod::VisitArrayRoots<PointerSize::k64>(*this,
+                                                           start_boundary,
+                                                           end_boundary,
+                                                           array);
+            } else {
+              DCHECK_EQ(pointer_size_, PointerSize::k32);
+              ArtMethod::VisitArrayRoots<PointerSize::k32>(*this,
+                                                           start_boundary,
+                                                           end_boundary,
+                                                           array);
+            }
+          }
+        }
+        break;
+      case LinearAllocKind::kArtMethod:
+        ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
+        break;
+      case LinearAllocKind::kArtFieldArray:
+        ArtField::VisitArrayRoots(*this,
+                                  start_boundary,
+                                  end_boundary,
+                                  static_cast<LengthPrefixedArray<ArtField>*>(obj));
+        break;
+      case LinearAllocKind::kDexCacheArray:
+        {
+          mirror::DexCachePair<mirror::Object>* first =
+              reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
+          mirror::DexCachePair<mirror::Object>* last =
+              reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
+          mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
+        }
+        break;
+      case LinearAllocKind::kNoGCRoots:
+        break;
+    }
+  }
+
   MarkCompact* const collector_;
-  PointerSize pointer_size_;
+  const PointerSize pointer_size_;
 };
 
 void MarkCompact::PreCompactionPhase() {
@@ -1685,29 +1724,16 @@
       thread->AdjustTlab(black_objs_slide_diff_);
     }
   }
-
   {
-    // Native roots must be updated before updating system weaks as class linker
-    // holds roots to class loaders and dex-caches as weak roots. Also, space
-    // mremap must be done after this step as we require reading
-    // class/dex-cache/class-loader content for updating native roots.
     TimingLogger::ScopedTiming t2("(Paused)UpdateNativeRoots", GetTimings());
-    ClassLinker* class_linker = runtime->GetClassLinker();
-    NativeRootsUpdateVisitor visitor(this, class_linker->GetImagePointerSize());
+    NativeRootsUpdateVisitor visitor(this);
     {
       ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
-      class_linker->VisitBootClasses(&visitor);
-      class_linker->VisitClassLoaders(&visitor);
+      runtime->GetClassLinker()->VisitClassLoaders(&visitor);
     }
-    {
-      WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
-      ReaderMutexLock rmu(thread_running_gc_, *Locks::dex_lock_);
-      class_linker->VisitDexCaches(&visitor);
-      for (uint32_t cache : dex_caches_) {
-        visitor.VisitDexCache(reinterpret_cast<mirror::DexCache*>(cache));
-      }
-    }
-    dex_caches_.clear();
+    GcVisitedArenaPool *arena_pool =
+        static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
+    arena_pool->VisitRoots(visitor);
   }
 
   SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/true);
@@ -2380,14 +2406,6 @@
     UpdateLivenessInfo(obj);
   }
   obj->VisitReferences(visitor, visitor);
-  RememberDexCaches(obj);
-}
-
-void MarkCompact::RememberDexCaches(mirror::Object* obj) {
-  if (obj->IsDexCache()) {
-    dex_caches_.insert(
-            mirror::CompressedReference<mirror::Object>::FromMirrorPtr(obj).AsVRegValue());
-  }
 }
 
 // Scan anything that's on the mark stack.
diff --git a/runtime/linear_alloc-inl.h b/runtime/linear_alloc-inl.h
new file mode 100644
index 0000000..a6b3df3
--- /dev/null
+++ b/runtime/linear_alloc-inl.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LINEAR_ALLOC_INL_H_
+#define ART_RUNTIME_LINEAR_ALLOC_INL_H_
+
+#include "linear_alloc.h"
+
+#include "base/gc_visited_arena_pool.h"
+#include "thread-current-inl.h"
+
+namespace art {
+
+inline void LinearAlloc::SetFirstObject(void* begin, size_t bytes) const {
+  DCHECK(track_allocations_);
+  uint8_t* end = static_cast<uint8_t*>(begin) + bytes;
+  Arena* arena = allocator_.GetHeadArena();
+  DCHECK_NE(arena, nullptr);
+  // The object would either be in the head arena or the next one.
+  if (UNLIKELY(begin < arena->Begin() || begin >= arena->End())) {
+    arena = arena->Next();
+  }
+  DCHECK(begin >= arena->Begin() && end <= arena->End());
+  down_cast<TrackedArena*>(arena)->SetFirstObject(static_cast<uint8_t*>(begin), end);
+}
+
+inline void* LinearAlloc::Realloc(Thread* self,
+                                  void* ptr,
+                                  size_t old_size,
+                                  size_t new_size,
+                                  LinearAllocKind kind) {
+  MutexLock mu(self, lock_);
+  if (track_allocations_) {
+    if (ptr != nullptr) {
+      // Realloc cannot be called on 16-byte aligned as Realloc doesn't guarantee
+      // that. So the header must be immediately prior to ptr.
+      TrackingHeader* header = reinterpret_cast<TrackingHeader*>(ptr) - 1;
+      DCHECK_EQ(header->GetKind(), kind);
+      old_size += sizeof(TrackingHeader);
+      DCHECK_EQ(header->GetSize(), old_size);
+      ptr = header;
+    } else {
+      DCHECK_EQ(old_size, 0u);
+    }
+    new_size += sizeof(TrackingHeader);
+    void* ret = allocator_.Realloc(ptr, old_size, new_size);
+    new (ret) TrackingHeader(new_size, kind);
+    SetFirstObject(ret, new_size);
+    return static_cast<TrackingHeader*>(ret) + 1;
+  } else {
+    return allocator_.Realloc(ptr, old_size, new_size);
+  }
+}
+
+inline void* LinearAlloc::Alloc(Thread* self, size_t size, LinearAllocKind kind) {
+  MutexLock mu(self, lock_);
+  if (track_allocations_) {
+    size += sizeof(TrackingHeader);
+    TrackingHeader* storage = new (allocator_.Alloc(size)) TrackingHeader(size, kind);
+    SetFirstObject(storage, size);
+    return storage + 1;
+  } else {
+    return allocator_.Alloc(size);
+  }
+}
+
+inline void* LinearAlloc::AllocAlign16(Thread* self, size_t size, LinearAllocKind kind) {
+  MutexLock mu(self, lock_);
+  DCHECK_ALIGNED(size, 16);
+  if (track_allocations_) {
+    size_t mem_tool_bytes = ArenaAllocator::IsRunningOnMemoryTool()
+                            ? ArenaAllocator::kMemoryToolRedZoneBytes : 0;
+    uint8_t* ptr = allocator_.CurrentPtr() + sizeof(TrackingHeader);
+    uintptr_t padding =
+        RoundUp(reinterpret_cast<uintptr_t>(ptr), 16) - reinterpret_cast<uintptr_t>(ptr);
+    DCHECK_LT(padding, 16u);
+    size_t required_size = size + sizeof(TrackingHeader) + padding;
+
+    if (allocator_.CurrentArenaUnusedBytes() < required_size + mem_tool_bytes) {
+      // The allocator will require a new arena, which is expected to be
+      // 16-byte aligned.
+      static_assert(ArenaAllocator::kArenaAlignment >= 16,
+                    "Expecting sufficient alignment for new Arena.");
+      required_size = size + RoundUp(sizeof(TrackingHeader), 16);
+    }
+    // Using ArenaAllocator's AllocAlign16 now would disturb the alignment by
+    // trying to make header 16-byte aligned. The alignment requirements are
+    // already addressed here. Now we want allocator to just bump the pointer.
+    ptr = static_cast<uint8_t*>(allocator_.Alloc(required_size));
+    new (ptr) TrackingHeader(required_size, kind, /*is_16_aligned=*/true);
+    SetFirstObject(ptr, required_size);
+    return AlignUp(ptr + sizeof(TrackingHeader), 16);
+  } else {
+    return allocator_.AllocAlign16(size);
+  }
+}
+
+inline size_t LinearAlloc::GetUsedMemory() const {
+  MutexLock mu(Thread::Current(), lock_);
+  return allocator_.BytesUsed();
+}
+
+inline ArenaPool* LinearAlloc::GetArenaPool() {
+  MutexLock mu(Thread::Current(), lock_);
+  return allocator_.GetArenaPool();
+}
+
+inline bool LinearAlloc::Contains(void* ptr) const {
+  MutexLock mu(Thread::Current(), lock_);
+  return allocator_.Contains(ptr);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_LINEAR_ALLOC_INL_H_
diff --git a/runtime/linear_alloc.cc b/runtime/linear_alloc.cc
deleted file mode 100644
index 3f01fc3..0000000
--- a/runtime/linear_alloc.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "linear_alloc.h"
-
-#include "thread-current-inl.h"
-
-namespace art {
-
-LinearAlloc::LinearAlloc(ArenaPool* pool) : lock_("linear alloc"), allocator_(pool) {
-}
-
-void* LinearAlloc::Realloc(Thread* self, void* ptr, size_t old_size, size_t new_size) {
-  MutexLock mu(self, lock_);
-  return allocator_.Realloc(ptr, old_size, new_size);
-}
-
-void* LinearAlloc::Alloc(Thread* self, size_t size) {
-  MutexLock mu(self, lock_);
-  return allocator_.Alloc(size);
-}
-
-void* LinearAlloc::AllocAlign16(Thread* self, size_t size) {
-  MutexLock mu(self, lock_);
-  return allocator_.AllocAlign16(size);
-}
-
-size_t LinearAlloc::GetUsedMemory() const {
-  MutexLock mu(Thread::Current(), lock_);
-  return allocator_.BytesUsed();
-}
-
-ArenaPool* LinearAlloc::GetArenaPool() {
-  MutexLock mu(Thread::Current(), lock_);
-  return allocator_.GetArenaPool();
-}
-
-bool LinearAlloc::Contains(void* ptr) const {
-  MutexLock mu(Thread::Current(), lock_);
-  return allocator_.Contains(ptr);
-}
-
-bool LinearAlloc::ContainsUnsafe(void* ptr) const {
-  return allocator_.Contains(ptr);
-}
-
-}  // namespace art
diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
index 1d01f84..fe92d19 100644
--- a/runtime/linear_alloc.h
+++ b/runtime/linear_alloc.h
@@ -18,27 +18,70 @@
 #define ART_RUNTIME_LINEAR_ALLOC_H_
 
 #include "base/arena_allocator.h"
+#include "base/casts.h"
 #include "base/mutex.h"
 
 namespace art {
 
 class ArenaPool;
 
+enum class LinearAllocKind : uint32_t {
+  kNoGCRoots,
+  kGCRootArray,
+  kArtMethodArray,
+  kArtFieldArray,
+  kDexCacheArray,
+  kArtMethod
+};
+
+// Header for every allocation in LinearAlloc. The header provides the type
+// and size information to the GC for invoking the right visitor.
+class TrackingHeader final {
+ public:
+  static constexpr uint32_t kIs16Aligned = 1;
+  TrackingHeader(size_t size, LinearAllocKind kind, bool is_16_aligned = false)
+      : kind_(kind), size_(dchecked_integral_cast<uint32_t>(size)) {
+    // We need the last bit to store 16-byte alignment flag.
+    CHECK_EQ(size_ & kIs16Aligned, 0u);
+    if (is_16_aligned) {
+      size_ |= kIs16Aligned;
+    }
+  }
+
+  LinearAllocKind GetKind() const { return kind_; }
+  size_t GetSize() const { return size_ & ~kIs16Aligned; }
+  bool Is16Aligned() const { return size_ & kIs16Aligned; }
+
+ private:
+  LinearAllocKind kind_;
+  uint32_t size_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(TrackingHeader);
+};
+
+std::ostream& operator<<(std::ostream& os, LinearAllocKind value);
+
 // TODO: Support freeing if we add class unloading.
 class LinearAlloc {
  public:
-  explicit LinearAlloc(ArenaPool* pool);
+  static constexpr size_t kAlignment = 8u;
+  static_assert(kAlignment >= ArenaAllocator::kAlignment);
+  static_assert(sizeof(TrackingHeader) == ArenaAllocator::kAlignment);
 
-  void* Alloc(Thread* self, size_t size) REQUIRES(!lock_);
-  void* AllocAlign16(Thread* self, size_t size) REQUIRES(!lock_);
+  explicit LinearAlloc(ArenaPool* pool, bool track_allocs)
+      : lock_("linear alloc"), allocator_(pool), track_allocations_(track_allocs) {}
+
+  void* Alloc(Thread* self, size_t size, LinearAllocKind kind) REQUIRES(!lock_);
+  void* AllocAlign16(Thread* self, size_t size, LinearAllocKind kind) REQUIRES(!lock_);
 
   // Realloc never frees the input pointer, it is the caller's job to do this if necessary.
-  void* Realloc(Thread* self, void* ptr, size_t old_size, size_t new_size) REQUIRES(!lock_);
+  void* Realloc(Thread* self, void* ptr, size_t old_size, size_t new_size, LinearAllocKind kind)
+      REQUIRES(!lock_);
 
   // Allocate an array of structs of type T.
   template<class T>
-  T* AllocArray(Thread* self, size_t elements) REQUIRES(!lock_) {
-    return reinterpret_cast<T*>(Alloc(self, elements * sizeof(T)));
+  T* AllocArray(Thread* self, size_t elements, LinearAllocKind kind) REQUIRES(!lock_) {
+    return reinterpret_cast<T*>(Alloc(self, elements * sizeof(T), kind));
   }
 
   // Return the number of bytes used in the allocator.
@@ -46,16 +89,23 @@
 
   ArenaPool* GetArenaPool() REQUIRES(!lock_);
 
-  // Return true if the linear alloc contrains an address.
+  // Return true if the linear alloc contains an address.
   bool Contains(void* ptr) const REQUIRES(!lock_);
 
   // Unsafe version of 'Contains' only to be used when the allocator is going
   // to be deleted.
-  bool ContainsUnsafe(void* ptr) const NO_THREAD_SAFETY_ANALYSIS;
+  bool ContainsUnsafe(void* ptr) const NO_THREAD_SAFETY_ANALYSIS {
+    return allocator_.Contains(ptr);
+  }
+
+  // Set the given object as the first object for all the pages where the
+  // page-beginning overlaps with the object.
+  void SetFirstObject(void* begin, size_t bytes) const REQUIRES(lock_);
 
  private:
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ArenaAllocator allocator_ GUARDED_BY(lock_);
+  const bool track_allocations_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(LinearAlloc);
 };
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index b937c2c..402bb72 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -29,7 +29,7 @@
 #include "class_linker.h"
 #include "dex/dex_file.h"
 #include "gc_root-inl.h"
-#include "linear_alloc.h"
+#include "linear_alloc-inl.h"
 #include "mirror/call_site.h"
 #include "mirror/class.h"
 #include "mirror/method_type.h"
@@ -54,7 +54,10 @@
 }
 
 template<typename T, size_t kMaxCacheSize>
-T* DexCache::AllocArray(MemberOffset obj_offset, MemberOffset num_offset, size_t num) {
+T* DexCache::AllocArray(MemberOffset obj_offset,
+                        MemberOffset num_offset,
+                        size_t num,
+                        LinearAllocKind kind) {
   num = std::min<size_t>(num, kMaxCacheSize);
   if (num == 0) {
     return nullptr;
@@ -74,7 +77,7 @@
     DCHECK(alloc->Contains(array));
     return array;  // Other thread just allocated the array.
   }
-  array = reinterpret_cast<T*>(alloc->AllocAlign16(self, RoundUp(num * sizeof(T), 16)));
+  array = reinterpret_cast<T*>(alloc->AllocAlign16(self, RoundUp(num * sizeof(T), 16), kind));
   InitializeArray(array);  // Ensure other threads see the array initialized.
   dex_cache->SetField32Volatile<false, false>(num_offset, num);
   dex_cache->SetField64Volatile<false, false>(obj_offset, reinterpret_cast64<uint64_t>(array));
@@ -136,7 +139,10 @@
   StringDexCacheType* strings = GetStrings();
   if (UNLIKELY(strings == nullptr)) {
     strings = AllocArray<StringDexCacheType, kDexCacheStringCacheSize>(
-        StringsOffset(), NumStringsOffset(), GetDexFile()->NumStringIds());
+        StringsOffset(),
+        NumStringsOffset(),
+        GetDexFile()->NumStringIds(),
+        LinearAllocKind::kDexCacheArray);
   }
   strings[StringSlotIndex(string_idx)].store(
       StringDexCachePair(resolved, string_idx.index_), std::memory_order_relaxed);
@@ -188,7 +194,10 @@
   TypeDexCacheType* resolved_types = GetResolvedTypes();
   if (UNLIKELY(resolved_types == nullptr)) {
     resolved_types = AllocArray<TypeDexCacheType, kDexCacheTypeCacheSize>(
-        ResolvedTypesOffset(), NumResolvedTypesOffset(), GetDexFile()->NumTypeIds());
+        ResolvedTypesOffset(),
+        NumResolvedTypesOffset(),
+        GetDexFile()->NumTypeIds(),
+        LinearAllocKind::kDexCacheArray);
   }
   // TODO default transaction support.
   // Use a release store for SetResolvedType. This is done to prevent other threads from seeing a
@@ -237,7 +246,10 @@
   MethodTypeDexCacheType* methods = GetResolvedMethodTypes();
   if (UNLIKELY(methods == nullptr)) {
     methods = AllocArray<MethodTypeDexCacheType, kDexCacheMethodTypeCacheSize>(
-        ResolvedMethodTypesOffset(), NumResolvedMethodTypesOffset(), GetDexFile()->NumProtoIds());
+        ResolvedMethodTypesOffset(),
+        NumResolvedMethodTypesOffset(),
+        GetDexFile()->NumProtoIds(),
+        LinearAllocKind::kDexCacheArray);
   }
   methods[MethodTypeSlotIndex(proto_idx)].store(
       MethodTypeDexCachePair(resolved, proto_idx.index_), std::memory_order_relaxed);
@@ -285,7 +297,10 @@
   GcRoot<CallSite>* call_sites = GetResolvedCallSites();
   if (UNLIKELY(call_sites == nullptr)) {
     call_sites = AllocArray<GcRoot<CallSite>, std::numeric_limits<size_t>::max()>(
-        ResolvedCallSitesOffset(), NumResolvedCallSitesOffset(), GetDexFile()->NumCallSiteIds());
+        ResolvedCallSitesOffset(),
+        NumResolvedCallSitesOffset(),
+        GetDexFile()->NumCallSiteIds(),
+        LinearAllocKind::kGCRootArray);
   }
   GcRoot<mirror::CallSite>& target = call_sites[call_site_idx];
 
@@ -323,7 +338,10 @@
   FieldDexCacheType* fields = GetResolvedFields();
   if (UNLIKELY(fields == nullptr)) {
     fields = AllocArray<FieldDexCacheType, kDexCacheFieldCacheSize>(
-        ResolvedFieldsOffset(), NumResolvedFieldsOffset(), GetDexFile()->NumFieldIds());
+        ResolvedFieldsOffset(),
+        NumResolvedFieldsOffset(),
+        GetDexFile()->NumFieldIds(),
+        LinearAllocKind::kNoGCRoots);
   }
   SetNativePair(fields, FieldSlotIndex(field_idx), pair);
 }
@@ -350,7 +368,10 @@
   MethodDexCacheType* methods = GetResolvedMethods();
   if (UNLIKELY(methods == nullptr)) {
     methods = AllocArray<MethodDexCacheType, kDexCacheMethodCacheSize>(
-        ResolvedMethodsOffset(), NumResolvedMethodsOffset(), GetDexFile()->NumMethodIds());
+        ResolvedMethodsOffset(),
+        NumResolvedMethodsOffset(),
+        GetDexFile()->NumMethodIds(),
+        LinearAllocKind::kNoGCRoots);
   }
   SetNativePair(methods, MethodSlotIndex(method_idx), pair);
 }
@@ -396,6 +417,15 @@
   }
 }
 
+template <typename Visitor>
+void DexCache::VisitDexCachePairRoots(Visitor& visitor,
+                                      DexCachePair<Object>* pairs_begin,
+                                      DexCachePair<Object>* pairs_end) {
+  for (; pairs_begin < pairs_end; pairs_begin++) {
+    visitor.VisitRootIfNonNull(pairs_begin->object.AddressWithoutBarrier());
+  }
+}
+
 template <bool kVisitNativeRoots,
           VerifyObjectFlags kVerifyFlags,
           ReadBarrierOption kReadBarrierOption,
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 78c6bb5..7c7b11f 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -27,6 +27,7 @@
 #include "object_array.h"
 
 namespace art {
+enum class LinearAllocKind : uint32_t;
 
 namespace linker {
 class ImageWriter;
@@ -37,7 +38,6 @@
 struct DexCacheOffsets;
 class DexFile;
 union JValue;
-class LinearAlloc;
 class ReflectiveValueVisitor;
 class Thread;
 
@@ -189,6 +189,14 @@
     return sizeof(DexCache);
   }
 
+  // Visit gc-roots in DexCachePair array in [pairs_begin, pairs_end) range.
+  template <typename Visitor>
+  static void VisitDexCachePairRoots(Visitor& visitor,
+                                     DexCachePair<Object>* pairs_begin,
+                                     DexCachePair<Object>* pairs_end)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+
   void Initialize(const DexFile* dex_file, ObjPtr<ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::dex_lock_);
@@ -453,7 +461,7 @@
  private:
   // Allocate new array in linear alloc and save it in the given fields.
   template<typename T, size_t kMaxCacheSize>
-  T* AllocArray(MemberOffset obj_offset, MemberOffset num_offset, size_t num)
+  T* AllocArray(MemberOffset obj_offset, MemberOffset num_offset, size_t num, LinearAllocKind kind)
      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit instance fields of the dex cache as well as its associated arrays.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 4b161c6..6de4dda 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -519,7 +519,7 @@
   // Destroy allocators before shutting down the MemMap because they may use it.
   java_vm_.reset();
   linear_alloc_.reset();
-  low_4gb_arena_pool_.reset();
+  linear_alloc_arena_pool_.reset();
   arena_pool_.reset();
   jit_arena_pool_.reset();
   protected_fault_page_.Reset();
@@ -1744,9 +1744,14 @@
     jit_arena_pool_.reset(new MemMapArenaPool(/* low_4gb= */ false, "CompilerMetadata"));
   }
 
-  if (IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA)) {
-    // 4gb, no malloc. Explanation in header.
-    low_4gb_arena_pool_.reset(new MemMapArenaPool(/* low_4gb= */ true));
+  // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a
+  // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold
+  // when we have 64 bit ArtMethod pointers.
+  const bool low_4gb = IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA);
+  if (gUseUserfaultfd) {
+    linear_alloc_arena_pool_.reset(new GcVisitedArenaPool(low_4gb));
+  } else if (low_4gb) {
+    linear_alloc_arena_pool_.reset(new MemMapArenaPool(low_4gb));
   }
   linear_alloc_.reset(CreateLinearAlloc());
 
@@ -3117,13 +3122,12 @@
   return false;
 }
 
+
 LinearAlloc* Runtime::CreateLinearAlloc() {
-  // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a
-  // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold
-  // when we have 64 bit ArtMethod pointers.
-  return (IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA))
-      ? new LinearAlloc(low_4gb_arena_pool_.get())
-      : new LinearAlloc(arena_pool_.get());
+  ArenaPool* pool = linear_alloc_arena_pool_.get();
+  return pool != nullptr
+      ? new LinearAlloc(pool, gUseUserfaultfd)
+      : new LinearAlloc(arena_pool_.get(), /*track_allocs=*/ false);
 }
 
 double Runtime::GetHashTableMinLoadFactor() const {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 42c5235..ca968a2 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -784,6 +784,9 @@
   // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
+  ArenaPool* GetLinearAllocArenaPool() {
+    return linear_alloc_arena_pool_.get();
+  }
   ArenaPool* GetArenaPool() {
     return arena_pool_.get();
   }
@@ -1238,10 +1241,13 @@
 
   std::unique_ptr<ArenaPool> jit_arena_pool_;
   std::unique_ptr<ArenaPool> arena_pool_;
-  // Special low 4gb pool for compiler linear alloc. We need ArtFields to be in low 4gb if we are
-  // compiling using a 32 bit image on a 64 bit compiler in case we resolve things in the image
-  // since the field arrays are int arrays in this case.
-  std::unique_ptr<ArenaPool> low_4gb_arena_pool_;
+  // This pool is used for linear alloc if we are using userfaultfd GC, or if
+  // low 4gb pool is required for compiler linear alloc. Otherwise, use
+  // arena_pool_.
+  // We need ArtFields to be in low 4gb if we are compiling using a 32 bit image
+  // on a 64 bit compiler in case we resolve things in the image since the field
+  // arrays are int arrays in this case.
+  std::unique_ptr<ArenaPool> linear_alloc_arena_pool_;
 
   // Shared linear alloc for now.
   std::unique_ptr<LinearAlloc> linear_alloc_;
diff --git a/test/616-cha-unloading/cha_unload.cc b/test/616-cha-unloading/cha_unload.cc
index f9d3874..d776023 100644
--- a/test/616-cha-unloading/cha_unload.cc
+++ b/test/616-cha-unloading/cha_unload.cc
@@ -22,7 +22,7 @@
 #include "base/casts.h"
 #include "class_linker.h"
 #include "jit/jit.h"
-#include "linear_alloc.h"
+#include "linear_alloc-inl.h"
 #include "nativehelper/ScopedUtfChars.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
@@ -79,8 +79,8 @@
   // a reused one that covers the art_method pointer.
   std::unique_ptr<LinearAlloc> alloc(Runtime::Current()->CreateLinearAlloc());
   do {
-    // Ask for a byte - it's sufficient to get an arena.
-    alloc->Alloc(Thread::Current(), 1);
+    // Ask for a word - it's sufficient to get an arena.
+    alloc->Alloc(Thread::Current(), sizeof(void*), LinearAllocKind::kNoGCRoots);
   } while (!alloc->Contains(ptr));
 }
 
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 542ef9a..9c72df9 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -1127,7 +1127,7 @@
                   "2006-virtual-structural-finalizing",
                   "2007-virtual-structural-finalizable"
                 ],
-        "env_vars": {"ART_USE_READ_BARRIER": "false"},
+        "env_vars": {"ART_USE_READ_BARRIER": "false", "ART_DEFAULT_GC_TYPE": "CMS"},
         "description": ["Relies on the accuracy of the Heap::VisitObjects function which is broken",
                         " when READ_BARRIER==false (I.e. On CMS collector)."],
         "bug": "b/147207934"
@@ -1322,13 +1322,11 @@
         "description": ["Test containing Checker assertions expecting Baker read barriers."]
     },
     {
-        "tests": ["2009-structural-local-ref",
-                  "2035-structural-native-method",
-	          "2036-structural-subclass-shadow",
-	          "2040-huge-native-alloc"],
+        "tests": ["2040-huge-native-alloc"],
         "env_vars": {"ART_USE_READ_BARRIER": "false"},
+	"variant": "debug",
         "bug": "b/242181443",
-        "description": ["Tests temporarily disabled for userfaultfd GC. Remove once native GC-root updation is implemented."]
+        "description": ["Test fails due to delay delebrately added in the userfaultfd GC between marking and compaction."]
     },
     {
         "tests": ["1004-checker-volatile-ref-load"],
@@ -1374,7 +1372,7 @@
     },
     {
         "tests": ["2043-reference-pauses"],
-        "env_vars": {"ART_USE_READ_BARRIER": "false"},
+        "env_vars": {"ART_USE_READ_BARRIER": "false", "ART_DEFAULT_GC_TYPE": "CMS"},
         "variant": "host",
         "bug": "b/232459100",
         "description": ["Fails intermittently for CMS."]
@@ -1437,7 +1435,7 @@
     },
     {
         "tests": ["692-vdex-secondary-loader"],
-        "env_vars": {"ART_USE_READ_BARRIER": "false"},
+        "env_vars": {"ART_USE_READ_BARRIER": "false", "ART_DEFAULT_GC_TYPE": "CMS"},
         "description": ["Uses the low-ram flag which does not work with CMS"]
     },
     {