2 files changed, 290 insertions, 595 deletions
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index c1a8b40ed3..85d4c1af71 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -308,8 +308,7 @@ static constexpr size_t kMaxNumUffdWorkers = 2;
 // phase.
 static constexpr size_t kMutatorCompactionBufferCount = 2048;
 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
-// Choose a reasonable size to avoid making too many batched ioctl and madvise calls.
-static constexpr ssize_t kMinFromSpaceMadviseSize = 8 * MB;
+static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB;
 // Concurrent compaction termination logic is different (and slightly more efficient) if the
 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
@@ -396,15 +395,6 @@ static bool IsSigbusFeatureAvailable() {
   return (gUffdFeatures & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
 }
 
-size_t MarkCompact::ComputeInfoMapSize() {
-  size_t moving_space_size = bump_pointer_space_->Capacity();
-  size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
-  size_t nr_moving_pages = DivideByPageSize(moving_space_size);
-  size_t nr_non_moving_pages = DivideByPageSize(heap_->GetNonMovingSpace()->Capacity());
-  return chunk_info_vec_size * sizeof(uint32_t) + nr_non_moving_pages * sizeof(ObjReference) +
-         nr_moving_pages * (sizeof(ObjReference) + sizeof(uint32_t) + sizeof(Atomic<uint32_t>));
-}
-
 size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
   size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
 
@@ -412,18 +402,14 @@ size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
   vector_length_ = moving_space_sz / kOffsetChunkSize;
   size_t total = vector_length_ * sizeof(uint32_t);
 
+  first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
+  total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
+
   first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
   total += nr_moving_pages * sizeof(ObjReference);
 
   pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p + total);
   total += nr_moving_pages * sizeof(uint32_t);
-
-  moving_pages_status_ = reinterpret_cast<Atomic<uint32_t>*>(p + total);
-  total += nr_moving_pages * sizeof(Atomic<uint32_t>);
-
-  first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
-  total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
-  DCHECK_EQ(total, ComputeInfoMapSize());
   return total;
 }
 
@@ -467,21 +453,26 @@ MarkCompact::MarkCompact(Heap* heap)
           reinterpret_cast<uintptr_t>(bump_pointer_space_->Begin()),
           reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
 
-  std::string err_msg;
+  // Create one MemMap for all the data structures
   size_t moving_space_size = bump_pointer_space_->Capacity();
-  {
-    // Create one MemMap for all the data structures
-    info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
-                                     ComputeInfoMapSize(),
-                                     PROT_READ | PROT_WRITE,
-                                     /*low_4gb=*/false,
-                                     &err_msg);
-    if (UNLIKELY(!info_map_.IsValid())) {
-      LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
-    } else {
-      size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
-      DCHECK_EQ(total, info_map_.Size());
-    }
+  size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
+  size_t nr_moving_pages = DivideByPageSize(moving_space_size);
+  size_t nr_non_moving_pages = DivideByPageSize(heap->GetNonMovingSpace()->Capacity());
+
+  std::string err_msg;
+  info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
+                                   chunk_info_vec_size * sizeof(uint32_t)
+                                   + nr_non_moving_pages * sizeof(ObjReference)
+                                   + nr_moving_pages * sizeof(ObjReference)
+                                   + nr_moving_pages * sizeof(uint32_t),
+                                   PROT_READ | PROT_WRITE,
+                                   /*low_4gb=*/ false,
+                                   &err_msg);
+  if (UNLIKELY(!info_map_.IsValid())) {
+    LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
+  } else {
+    size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
+    DCHECK_EQ(total, info_map_.Size());
   }
 
   size_t moving_space_alignment = Heap::BestPageTableAlignment(moving_space_size);
@@ -844,12 +835,11 @@ void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) {
   size_t chunk_idx;
   // Find the first live word in the space
   for (chunk_idx = 0; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
-    if (chunk_idx >= vec_len) {
+    if (chunk_idx > vec_len) {
       // We don't have any live data on the moving-space.
       return;
     }
   }
-  DCHECK_LT(chunk_idx, vec_len);
   // Use live-words bitmap to find the first word
   offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
   offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
@@ -870,7 +860,7 @@ void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) {
   uint32_t page_live_bytes = 0;
   while (true) {
     for (; page_live_bytes <= gPageSize; chunk_idx++) {
-      if (chunk_idx >= vec_len) {
+      if (chunk_idx > vec_len) {
         moving_first_objs_count_ = to_space_page_idx;
         return;
       }
@@ -2047,72 +2037,42 @@ void MarkCompact::MapProcessedPages(uint8_t* to_space_start,
   }
 }
 
-void MarkCompact::ZeropageIoctl(void* addr,
-                                size_t length,
-                                bool tolerate_eexist,
-                                bool tolerate_enoent) {
+void MarkCompact::ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent) {
   struct uffdio_zeropage uffd_zeropage;
   DCHECK(IsAlignedParam(addr, gPageSize));
   uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
-  uffd_zeropage.range.len = length;
+  uffd_zeropage.range.len = gPageSize;
   uffd_zeropage.mode = 0;
-  while (length > 0) {
-    int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
-    if (ret == 0) {
-      DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(length));
-      break;
-    } else if (errno == EAGAIN) {
-      // Ioctl aborted due to mmap_lock contention. Adjust the values and try
-      // again.
-      DCHECK_GE(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
-      length -= uffd_zeropage.zeropage;
-      uffd_zeropage.range.len = length;
-      uffd_zeropage.range.start += uffd_zeropage.zeropage;
-    } else {
-      DCHECK_EQ(uffd_zeropage.zeropage, -errno);
-      CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST))
-          << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
-      break;
-    }
+  int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
+  if (LIKELY(ret == 0)) {
+    DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
+  } else {
+    CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST))
+        << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
   }
 }
 
-void MarkCompact::CopyIoctl(void* dst, void* buffer, size_t length) {
+void MarkCompact::CopyIoctl(void* dst, void* buffer) {
   struct uffdio_copy uffd_copy;
   uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
   uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
-  uffd_copy.len = length;
+  uffd_copy.len = gPageSize;
   uffd_copy.mode = 0;
-  while (length > 0) {
-    int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy);
-    if (ret == 0) {
-      DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(length));
-      break;
-    } else if (errno == EAGAIN) {
-      // Ioctl aborted due to mmap_lock contention. Adjust the values and try
-      // again.
-      DCHECK_GE(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
-      length -= uffd_copy.copy;
-      uffd_copy.len = length;
-      uffd_copy.src += uffd_copy.copy;
-      uffd_copy.dst += uffd_copy.copy;
-    } else {
-      DCHECK_EQ(uffd_copy.copy, -errno);
-      LOG(FATAL) << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
-                 << " dst:" << dst;
-    }
-  }
+  CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0)
+      << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
+      << " dst:" << dst;
+  DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
 }
 
 template <int kMode, typename CompactionFn>
-bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
+void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
+                                                  size_t status_arr_len,
                                                   uint8_t* to_space_page,
                                                   uint8_t* page,
-                                                  bool map_immediately,
                                                   CompactionFn func) {
-  uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
-  uint32_t desired_state = static_cast<uint8_t>(map_immediately ? PageState::kProcessingAndMapping :
-                                                                  PageState::kProcessing);
+  PageState expected_state = PageState::kUnprocessed;
+  PageState desired_state =
+      kMode == kCopyMode ? PageState::kProcessingAndMapping : PageState::kProcessing;
   // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
   // to moving_spaces_status_[page_idx] is released before the contents of the page are
   // made accessible to other threads.
@@ -2123,57 +2083,32 @@ bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
                                     expected_state, desired_state, std::memory_order_acquire)) {
     func();
     if (kMode == kCopyMode) {
-      if (map_immediately) {
-        CopyIoctl(to_space_page, page, gPageSize);
-        // Store is sufficient as no other thread could modify the status at this
-        // point. Relaxed order is sufficient as the ioctl will act as a fence.
-        moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
-                                             std::memory_order_relaxed);
-      } else {
-        // Add the src page's index in the status word.
-        DCHECK(from_space_map_.HasAddress(page));
-        DCHECK_LE(static_cast<size_t>(page - from_space_begin_),
-                  std::numeric_limits<uint32_t>::max());
-        uint32_t store_val = page - from_space_begin_;
-        DCHECK_EQ(store_val & kPageStateMask, 0u);
-        store_val |= static_cast<uint8_t>(PageState::kProcessed);
+      CopyIoctl(to_space_page, page);
+      if (use_uffd_sigbus_) {
         // Store is sufficient as no other thread would modify the status at this point.
-        moving_pages_status_[page_idx].store(store_val, std::memory_order_release);
+        moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped,
+                                             std::memory_order_release);
       }
     } else if (kMode == kMinorFaultMode) {
-      expected_state = static_cast<uint8_t>(PageState::kProcessing);
-      desired_state = static_cast<uint8_t>(PageState::kProcessed);
+      expected_state = PageState::kProcessing;
+      desired_state = PageState::kProcessed;
       // the CAS needs to be with release order to ensure that stores to the
       // page makes it to memory *before* other threads observe that it's
       // ready to be mapped.
       if (!moving_pages_status_[page_idx].compare_exchange_strong(
               expected_state, desired_state, std::memory_order_release)) {
         // Some mutator has requested to map the page after processing it.
-        DCHECK_EQ(expected_state, static_cast<uint8_t>(PageState::kProcessingAndMapping));
+        DCHECK_EQ(expected_state, PageState::kProcessingAndMapping);
+        MapProcessedPages</*kFirstPageMapping=*/true>(
+            to_space_page, moving_pages_status_, page_idx, status_arr_len);
       }
-      UNREACHABLE();
     }
-    return true;
   } else {
-    // Only GC thread could have set the state to Processed.
-    DCHECK_NE(expected_state, static_cast<uint8_t>(PageState::kProcessed));
-    return false;
-  }
-}
-
-static void BackOff(uint32_t i) {
-  static constexpr uint32_t kYieldMax = 5;
-  // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
-  if (i <= kYieldMax) {
-    sched_yield();
-  } else {
-    // nanosleep is not in the async-signal-safe list, but bionic implements it
-    // with a pure system call, so it should be fine.
-    NanoSleep(10000ull * (i - kYieldMax));
+    DCHECK_GT(expected_state, PageState::kProcessed);
   }
 }
 
-bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) {
+void MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode) {
   // Thanks to sliding compaction, bump-pointer allocations, and reverse
   // compaction (see CompactMovingSpace) the logic here is pretty simple: find
   // the to-space page up to which compaction has finished, all the from-space
@@ -2183,8 +2118,7 @@ bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_i
   // Find the to-space page up to which the corresponding from-space pages can be
   // freed.
   for (; idx > cur_page_idx; idx--) {
-    PageState state = static_cast<PageState>(
-        static_cast<uint8_t>(moving_pages_status_[idx - 1].load(std::memory_order_acquire)));
+    PageState state = moving_pages_status_[idx - 1].load(std::memory_order_acquire);
     if (state == PageState::kMutatorProcessing) {
       // Some mutator is working on the page.
       break;
@@ -2196,7 +2130,7 @@ bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_i
   DCHECK_LE(idx, last_checked_reclaim_page_idx_);
   if (idx == last_checked_reclaim_page_idx_) {
     // Nothing to do.
-    return false;
+    return;
   }
 
   uint8_t* reclaim_begin;
@@ -2283,43 +2217,15 @@ bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_i
     // lower than the reclaim range.
     break;
   }
-  bool ret = mode == kFallbackMode;
+
   ssize_t size = last_reclaimed_page_ - reclaim_begin;
-  if (size > kMinFromSpaceMadviseSize) {
-    // Map all the pages in the range.
-    if (mode == kCopyMode && cur_page_idx < end_idx_for_mapping) {
-      size_t len = MapMovingSpacePages(cur_page_idx, end_idx_for_mapping);
-      // The pages that were not mapped by gc-thread have to be completed
-      // before we madvise them. So wait for their status to change to 'mapped'.
-      // The wait is expected to be short as the read state indicates that
-      // another thread is actively working on mapping the page.
-      for (size_t i = cur_page_idx + DivideByPageSize(len); i < end_idx_for_mapping; i++) {
-        PageState state = static_cast<PageState>(
-            static_cast<uint8_t>(moving_pages_status_[i].load(std::memory_order_relaxed)));
-        uint32_t backoff_count = 0;
-        while (state != PageState::kProcessedAndMapped) {
-          BackOff(backoff_count++);
-          state = static_cast<PageState>(
-              static_cast<uint8_t>(moving_pages_status_[i].load(std::memory_order_relaxed)));
-        }
-      }
-      ret = true;
-    }
-    // Retain a few pages for subsequent compactions.
-    const ssize_t gBufferPages = 4 * gPageSize;
-    DCHECK_LT(gBufferPages, kMinFromSpaceMadviseSize);
-    size -= gBufferPages;
-    uint8_t* addr = last_reclaimed_page_ - size;
+  if (size >= kMinFromSpaceMadviseSize) {
     int behavior = minor_fault_initialized_ ? MADV_REMOVE : MADV_DONTNEED;
-    CHECK_EQ(madvise(addr + from_space_slide_diff_, size, behavior), 0)
+    CHECK_EQ(madvise(reclaim_begin + from_space_slide_diff_, size, behavior), 0)
         << "madvise of from-space failed: " << strerror(errno);
-    last_reclaimed_page_ = addr;
-    cur_reclaimable_page_ = addr;
+    last_reclaimed_page_ = reclaim_begin;
   }
-  CHECK_LE(reclaim_begin, last_reclaimable_page_);
-  last_reclaimable_page_ = reclaim_begin;
   last_checked_reclaim_page_idx_ = idx;
-  return ret;
 }
 
 void MarkCompact::UpdateClassAfterObjMap() {
@@ -2372,8 +2278,6 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) {
   UpdateClassAfterObjMap();
   // These variables are maintained by FreeFromSpacePages().
   last_reclaimed_page_ = pre_compact_page;
-  last_reclaimable_page_ = last_reclaimed_page_;
-  cur_reclaimable_page_ = last_reclaimed_page_;
   last_checked_reclaim_page_idx_ = idx;
   class_after_obj_iter_ = class_after_obj_ordered_map_.rbegin();
   // Allocated-black pages
@@ -2392,9 +2296,9 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) {
     uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[idx];
     if (first_obj != nullptr) {
       DoPageCompactionWithStateChange<kMode>(idx,
+                                             page_status_arr_len,
                                              to_space_end,
                                              page,
-                                             /*map_immediately=*/true,
                                              [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
                                                SlideBlackPage(first_obj,
                                                               next_page_first_obj,
@@ -2406,15 +2310,13 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) {
       // We are sliding here, so no point attempting to madvise for every
       // page. Wait for enough pages to be done.
       if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
-        FreeFromSpacePages(idx, kMode, /*end_idx_for_mapping=*/0);
+        FreeFromSpacePages(idx, kMode);
       }
     }
     next_page_first_obj = first_obj;
   }
   DCHECK_EQ(pre_compact_page, black_allocations_begin_);
-  // Reserved page to be used if we can't find any reclaimable page for processing.
-  uint8_t* reserve_page = page;
-  size_t end_idx_for_mapping = idx;
+
   while (idx > 0) {
     idx--;
     to_space_end -= gPageSize;
@@ -2423,83 +2325,17 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) {
       page = shadow_space_end;
     } else if (kMode == kFallbackMode) {
       page = to_space_end;
-    } else {
-      DCHECK_EQ(kMode, kCopyMode);
-      if (cur_reclaimable_page_ > last_reclaimable_page_) {
-        cur_reclaimable_page_ -= gPageSize;
-        page = cur_reclaimable_page_ + from_space_slide_diff_;
-      } else {
-        page = reserve_page;
-      }
     }
     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
-    bool success = DoPageCompactionWithStateChange<kMode>(
-        idx,
-        to_space_end,
-        page,
-        /*map_immediately=*/page == reserve_page,
-        [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
+    DoPageCompactionWithStateChange<kMode>(
+        idx, page_status_arr_len, to_space_end, page, [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
           CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode);
         });
-    if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) {
-      // map the pages in the following pages as they can't be mapped with
-      // the subsequent pages as their src-side pages won't be contiguous.
-      MapMovingSpacePages(idx + 1, end_idx_for_mapping);
-    }
-    if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
-      end_idx_for_mapping = idx;
-    }
-  }
-  // map one last time to finish anything left.
-  if (kMode == kCopyMode && end_idx_for_mapping > 0) {
-    MapMovingSpacePages(idx, end_idx_for_mapping);
+    FreeFromSpacePages(idx, kMode);
   }
   DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
 }
 
-size_t MarkCompact::MapMovingSpacePages(size_t arr_idx, size_t arr_len) {
-  // Claim all the contiguous pages, which are ready to be mapped, and then do
-  // so in a single ioctl. This helps avoid the overhead of invoking syscall
-  // several times and also maps the already-processed pages, avoiding
-  // unnecessary faults on them.
-  DCHECK_LT(arr_idx, arr_len);
-  uint32_t cur_state = moving_pages_status_[arr_idx].load(std::memory_order_relaxed);
-  if ((cur_state & kPageStateMask) != static_cast<uint8_t>(PageState::kProcessed)) {
-    return 0;
-  }
-  uint32_t from_space_offset = cur_state & ~kPageStateMask;
-  uint8_t* to_space_start = moving_space_begin_ + arr_idx * gPageSize;
-  uint8_t* from_space_start = from_space_begin_ + from_space_offset;
-  DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
-  DCHECK_ALIGNED_PARAM(from_space_start, gPageSize);
-  size_t length = 0;
-  for (size_t i = arr_idx; i < arr_len; length += gPageSize, from_space_offset += gPageSize, i++) {
-    uint8_t desired_state = static_cast<uint8_t>(PageState::kProcessedAndMapping);
-    cur_state = moving_pages_status_[i].load(std::memory_order_relaxed);
-    // We need to guarantee that we don't end up sucsessfully marking a later
-    // page 'mapping' and then fail to mark an earlier page. To guarantee that
-    // we use acq_rel order.
-    if ((cur_state & kPageStateMask) != static_cast<uint8_t>(PageState::kProcessed) ||
-        !moving_pages_status_[i].compare_exchange_strong(
-            cur_state, desired_state, std::memory_order_acq_rel)) {
-      break;
-    }
-    DCHECK_EQ(from_space_offset, cur_state & ~kPageStateMask);
-  }
-  if (length > 0) {
-    CopyIoctl(to_space_start, from_space_start, length);
-    for (size_t i = arr_idx; length > 0; length -= gPageSize, i++) {
-      // It's sufficient to use relaxed memory-order as these stores are
-      // happening after ioctl, which acts as a fence.
-      // Store is sufficient as there are no other threads updating status of
-      // these pages.
-      moving_pages_status_[i].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
-                                    std::memory_order_relaxed);
-    }
-  }
-  return length;
-}
-
 void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) {
   DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize);
   // For every object found in the page, visit the previous object. This ensures
@@ -3017,6 +2853,17 @@ void MarkCompact::CompactionPause() {
     // TODO: We can reduce the time spent on this in a pause by performing one
     // round of this concurrently prior to the pause.
     UpdateMovingSpaceBlackAllocations();
+    // TODO: If we want to avoid this allocation in a pause then we will have to
+    // allocate an array for the entire moving-space size, which can be made
+    // part of info_map_.
+    moving_pages_status_ = new Atomic<PageState>[moving_first_objs_count_ + black_page_count_];
+    if (kIsDebugBuild) {
+      size_t len = moving_first_objs_count_ + black_page_count_;
+      for (size_t i = 0; i < len; i++) {
+          CHECK_EQ(moving_pages_status_[i].load(std::memory_order_relaxed),
+                   PageState::kUnprocessed);
+      }
+    }
     // Iterate over the allocation_stack_, for every object in the non-moving
     // space:
     // 1. Mark the object in live bitmap
@@ -3024,6 +2871,7 @@ void MarkCompact::CompactionPause() {
     // 3. In the corresponding page, if the first-object vector needs updating
     // then do so.
     UpdateNonMovingSpaceBlackAllocations();
+
     // This store is visible to mutator (or uffd worker threads) as the mutator
     // lock's unlock guarantees that.
     compacting_ = true;
@@ -3334,7 +3182,7 @@ void MarkCompact::ConcurrentCompaction(uint8_t* buf) {
       // zeropage so that the gc-thread can proceed. Otherwise, each thread does
       // it and the gc-thread will repeat this fault until thread_pool_counter == 0.
       if (!gKernelHasFaultRetry || ret == 1) {
-        ZeropageIoctl(fault_addr, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
+        ZeropageIoctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
       } else {
         struct uffdio_range uffd_range;
         uffd_range.start = msg.arg.pagefault.address;
@@ -3442,13 +3290,22 @@ bool MarkCompact::SigbusHandler(siginfo_t* info) {
   }
 }
 
+static void BackOff(uint32_t i) {
+  static constexpr uint32_t kYieldMax = 5;
+  // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
+  if (i <= kYieldMax) {
+    sched_yield();
+  } else {
+    // nanosleep is not in the async-signal-safe list, but bionic implements it
+    // with a pure system call, so it should be fine.
+    NanoSleep(10000ull * (i - kYieldMax));
+  }
+}
+
 template <int kMode>
 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
                                                 uint8_t* buf,
                                                 size_t nr_moving_space_used_pages) {
-  // TODO: add a class for Scoped dtor to set that a page has already mapped.
-  // This helps in avoiding a zero-page ioctl in gc-thread before unregistering
-  // unused space.
   class ScopedInProgressCount {
    public:
     explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
@@ -3463,7 +3320,6 @@ void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
     MarkCompact* collector_;
   };
 
-  Thread* self = Thread::Current();
   uint8_t* unused_space_begin =
       bump_pointer_space_->Begin() + nr_moving_space_used_pages * gPageSize;
   DCHECK(IsAlignedParam(unused_space_begin, gPageSize));
@@ -3472,203 +3328,159 @@ void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
     // There is a race which allows more than one thread to install a
     // zero-page. But we can tolerate that. So absorb the EEXIST returned by
     // the ioctl and move on.
-    ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
+    ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
     return;
   }
   size_t page_idx = DivideByPageSize(fault_page - bump_pointer_space_->Begin());
   DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
   mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
   if (first_obj == nullptr) {
-    // Install zero-page in the entire remaining tlab to avoid multiple ioctl invocations.
-    uint8_t* end = AlignDown(self->GetTlabEnd(), gPageSize);
-    if (fault_page < self->GetTlabPos() || fault_page >= end) {
-      end = fault_page + gPageSize;
-    }
-    size_t end_idx = page_idx + DivideByPageSize(end - fault_page);
-    size_t length = 0;
-    for (size_t idx = page_idx; idx < end_idx; idx++, length += gPageSize) {
-      // We should never have a case where two workers are trying to install a
-      // zeropage in this range as we synchronize using moving_pages_status_[page_idx].
-      uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
-      if (!moving_pages_status_[idx].compare_exchange_strong(
-              expected_state,
-              static_cast<uint8_t>(PageState::kProcessedAndMapping),
-              std::memory_order_acq_rel)) {
-        DCHECK_EQ(expected_state, static_cast<uint8_t>(PageState::kProcessedAndMapping));
-        break;
-      }
-    }
-    if (length > 0) {
+    // We should never have a case where two workers are trying to install a
+    // zeropage in this range as we synchronize using moving_pages_status_[page_idx].
+    PageState expected_state = PageState::kUnprocessed;
+    if (moving_pages_status_[page_idx].compare_exchange_strong(
+            expected_state, PageState::kProcessedAndMapping, std::memory_order_relaxed)) {
       // Note: ioctl acts as an acquire fence.
-      ZeropageIoctl(fault_page, length, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true);
-      for (size_t len = 0, idx = page_idx; len < length; idx++, len += gPageSize) {
-        moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
-                                        std::memory_order_relaxed);
-      }
+      ZeropageIoctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true);
+    } else {
+      DCHECK_EQ(expected_state, PageState::kProcessedAndMapping);
     }
     return;
   }
 
-  uint32_t raw_state = moving_pages_status_[page_idx].load(
+  PageState state = moving_pages_status_[page_idx].load(
       use_uffd_sigbus_ ? std::memory_order_acquire : std::memory_order_relaxed);
   uint32_t backoff_count = 0;
-  PageState state;
   while (true) {
-    state = static_cast<PageState>(static_cast<uint8_t>(raw_state));
-    if (state == PageState::kProcessing || state == PageState::kMutatorProcessing ||
-        state == PageState::kProcessingAndMapping || state == PageState::kProcessedAndMapping) {
-      if (!use_uffd_sigbus_) {
-        break;
-      }
-      // Wait for the page to be mapped (by gc-thread or some mutator) before returning.
-      // The wait is not expected to be long as the read state indicates that the other
-      // thread is actively working on the page.
-      BackOff(backoff_count++);
-      raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
-    } else if (state == PageState::kProcessedAndMapped) {
-      // Nothing to do.
-      break;
-    } else {
-      // The increment to the in-progress counter must be done before updating
-      // the page's state. Otherwise, we will end up leaving a window wherein
-      // the GC-thread could observe that no worker is working on compaction
-      // and could end up unregistering the moving space from userfaultfd.
-      ScopedInProgressCount spc(this);
-      // Acquire order to ensure we don't start writing to shadow map, which is
-      // shared, before the CAS is successful. Release order to ensure that the
-      // increment to moving_compaction_in_progress above is not re-ordered
-      // after the CAS.
-      if (state == PageState::kUnprocessed &&
-          moving_pages_status_[page_idx].compare_exchange_strong(
-              raw_state,
-              static_cast<uint8_t>(PageState::kMutatorProcessing),
-              std::memory_order_acq_rel)) {
-        if (kMode == kMinorFaultMode) {
-          DCHECK_EQ(buf, nullptr);
-          buf = shadow_to_space_map_.Begin() + page_idx * gPageSize;
-        } else if (UNLIKELY(buf == nullptr)) {
-          DCHECK_EQ(kMode, kCopyMode);
-          uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
-          // The buffer-map is one page bigger as the first buffer is used by GC-thread.
-          CHECK_LE(idx, kMutatorCompactionBufferCount);
-          buf = compaction_buffers_map_.Begin() + idx * gPageSize;
-          DCHECK(compaction_buffers_map_.HasAddress(buf));
-          self->SetThreadLocalGcBuffer(buf);
-        }
+    switch (state) {
+      case PageState::kUnprocessed: {
+        // The increment to the in-progress counter must be done before updating
+        // the page's state. Otherwise, we will end up leaving a window wherein
+        // the GC-thread could observe that no worker is working on compaction
+        // and could end up unregistering the moving space from userfaultfd.
+        ScopedInProgressCount spc(this);
+        // Acquire order to ensure we don't start writing to shadow map, which is
+        // shared, before the CAS is successful. Release order to ensure that the
+        // increment to moving_compactions_in_progress above is not re-ordered
+        // after the CAS.
+        if (moving_pages_status_[page_idx].compare_exchange_strong(
+                state, PageState::kMutatorProcessing, std::memory_order_acq_rel)) {
+          if (kMode == kMinorFaultMode) {
+            DCHECK_EQ(buf, nullptr);
+            buf = shadow_to_space_map_.Begin() + page_idx * gPageSize;
+          } else if (UNLIKELY(buf == nullptr)) {
+            DCHECK_EQ(kMode, kCopyMode);
+            uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
+            // The buffer-map is one page bigger as the first buffer is used by GC-thread.
+            CHECK_LE(idx, kMutatorCompactionBufferCount);
+            buf = compaction_buffers_map_.Begin() + idx * gPageSize;
+            DCHECK(compaction_buffers_map_.HasAddress(buf));
+            Thread::Current()->SetThreadLocalGcBuffer(buf);
+          }
 
-        if (fault_page < post_compact_end_) {
-          // The page has to be compacted.
-          CompactPage(
-              first_obj, pre_compact_offset_moving_space_[page_idx], buf, kMode == kCopyMode);
-        } else {
-          DCHECK_NE(first_obj, nullptr);
-          DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
-          uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
-          uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
-          mirror::Object* next_page_first_obj = nullptr;
-          if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
-            next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
+          if (fault_page < post_compact_end_) {
+            // The page has to be compacted.
+            CompactPage(
+                first_obj, pre_compact_offset_moving_space_[page_idx], buf, kMode == kCopyMode);
+          } else {
+            DCHECK_NE(first_obj, nullptr);
+            DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
+            uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
+            uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
+            mirror::Object* next_page_first_obj = nullptr;
+            if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
+              next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
+            }
+            DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
+            SlideBlackPage(first_obj,
+                           next_page_first_obj,
+                           first_chunk_size,
+                           pre_compact_page,
+                           buf,
+                           kMode == kCopyMode);
           }
-          DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
-          SlideBlackPage(first_obj,
-                         next_page_first_obj,
-                         first_chunk_size,
-                         pre_compact_page,
-                         buf,
-                         kMode == kCopyMode);
-        }
-        // Nobody else would simultaneously modify this page's state so an
-        // atomic store is sufficient. Use 'release' order to guarantee that
-        // loads/stores to the page are finished before this store. Since the
-        // mutator used its own buffer for the processing, there is no reason to
-        // put its index in the status of the page. Also, the mutator is going
-        // to immediately map the page, so that info is not needed.
-        moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapping),
-                                             std::memory_order_release);
-        if (kMode == kCopyMode) {
-          CopyIoctl(fault_page, buf, gPageSize);
-          // Store is sufficient as no other thread modifies the status at this stage.
-          moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
+          // Nobody else would simultaneously modify this page's state so an
+          // atomic store is sufficient. Use 'release' order to guarantee that
+          // loads/stores to the page are finished before this store.
+          moving_pages_status_[page_idx].store(PageState::kProcessedAndMapping,
                                                std::memory_order_release);
-          break;
-        } else {
-          // We don't support minor-fault feature anymore.
-          UNREACHABLE();
+          if (kMode == kCopyMode) {
+            CopyIoctl(fault_page, buf);
+            if (use_uffd_sigbus_) {
+              // Store is sufficient as no other thread modifies the status at this stage.
+              moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped,
+                                                   std::memory_order_release);
+            }
+            return;
+          } else {
+            break;
+          }
         }
       }
-      state = static_cast<PageState>(static_cast<uint8_t>(raw_state));
-      if (state == PageState::kProcessed) {
-        size_t arr_len = moving_first_objs_count_ + black_page_count_;
-        // The page is processed but not mapped. We should map it. The release
-        // order used in MapMovingSpacePages will ensure that the increment to
-        // moving_compaction_in_progress is done first.
-        if (MapMovingSpacePages(page_idx, arr_len) >= gPageSize) {
-          break;
+        continue;
+      case PageState::kProcessing:
+        DCHECK_EQ(kMode, kMinorFaultMode);
+        if (moving_pages_status_[page_idx].compare_exchange_strong(
+                state, PageState::kProcessingAndMapping, std::memory_order_relaxed) &&
+            !use_uffd_sigbus_) {
+          // Somebody else took or will take care of finishing the compaction and
+          // then mapping the page.
+          return;
         }
-        raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
-      }
+        continue;
+      case PageState::kProcessed:
+        // The page is processed but not mapped. We should map it.
+        break;
+      case PageState::kProcessingAndMapping:
+      case PageState::kMutatorProcessing:
+      case PageState::kProcessedAndMapping:
+        if (use_uffd_sigbus_) {
+          // Wait for the page to be mapped before returning.
+          BackOff(backoff_count++);
+          state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
+          continue;
+        }
+        return;
+      case PageState::kProcessedAndMapped:
+        // Somebody else took care of the page.
+        return;
     }
+    break;
+  }
+
+  DCHECK_EQ(kMode, kMinorFaultMode);
+  if (state == PageState::kUnprocessed) {
+    MapProcessedPages</*kFirstPageMapping=*/true>(
+        fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages);
+  } else {
+    DCHECK_EQ(state, PageState::kProcessed);
+    MapProcessedPages</*kFirstPageMapping=*/false>(
+        fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages);
   }
 }
 
-bool MarkCompact::MapUpdatedLinearAllocPages(uint8_t* start_page,
-                                             uint8_t* start_shadow_page,
-                                             Atomic<PageState>* state,
-                                             size_t length,
-                                             bool free_pages,
-                                             bool single_ioctl) {
+void MarkCompact::MapUpdatedLinearAllocPage(uint8_t* page,
+                                            uint8_t* shadow_page,
+                                            Atomic<PageState>& state,
+                                            bool page_touched) {
   DCHECK(!minor_fault_initialized_);
-  DCHECK_ALIGNED_PARAM(length, gPageSize);
-  uint8_t* end_page = start_page + length;
-  while (start_page < end_page) {
-    size_t map_len = 0;
-    // Claim a contiguous range of pages that we can map.
-    for (Atomic<PageState>* cur_state = state; map_len < length;
-         map_len += gPageSize, cur_state++) {
-      PageState expected_state = PageState::kProcessed;
-      if (!cur_state->compare_exchange_strong(
-              expected_state, PageState::kProcessedAndMapping, std::memory_order_acq_rel)) {
-        break;
-      }
-    }
-    if (map_len == 0) {
-      if (single_ioctl) {
-        // Didn't map anything.
-        return false;
-      }
-      // Skip all the pages that this thread can't map.
-      while (length > 0 && state->load(std::memory_order_relaxed) != PageState::kProcessed) {
-        state++;
-        length -= gPageSize;
-        start_shadow_page += gPageSize;
-        start_page += gPageSize;
-      }
-    } else {
-      CopyIoctl(start_page, start_shadow_page, map_len);
-      if (use_uffd_sigbus_) {
-        // Declare that the pages are ready to be accessed.
-        // Store is sufficient as no other thread can modify the status
-        // of this page at this point. Ioctl above will act as release fence.
-        for (size_t l = 0; l < map_len; l += gPageSize, state++) {
-          DCHECK_EQ(state->load(std::memory_order_relaxed), PageState::kProcessedAndMapping);
-          state->store(PageState::kProcessedAndMapped, std::memory_order_relaxed);
-        }
-      } else {
-        state += DivideByPageSize(map_len);
-      }
-      if (free_pages) {
-        ZeroAndReleaseMemory(start_shadow_page, map_len);
-      }
-      if (single_ioctl) {
-        break;
-      }
-      start_page += map_len;
-      start_shadow_page += map_len;
-      length -= map_len;
-      // state is already updated above.
-    }
+  if (page_touched) {
+    CopyIoctl(page, shadow_page);
+  } else {
+    // If the page wasn't touched, then it means it is empty and
+    // is most likely not present on the shadow-side. Furthermore,
+    // since the shadow is also userfaultfd registered doing copy
+    // ioctl fail as the copy-from-user in the kernel will cause
+    // userfault. Instead, just map a zeropage, which is not only
+    // correct but also efficient as it avoids unnecessary memcpy
+    // in the kernel.
+    ZeropageIoctl(page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
+  }
+  if (use_uffd_sigbus_) {
+    // Store is sufficient as no other thread can modify the
+    // status of this page at this point.
+    state.store(PageState::kProcessedAndMapped, std::memory_order_release);
   }
-  return true;
 }
 
 template <int kMode>
@@ -3695,7 +3507,7 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
       arena_iter->second <= fault_page) {
     // Fault page isn't in any of the arenas that existed before we started
     // compaction. So map zeropage and return.
-    ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false);
+    ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false);
   } else {
     // Find the linear-alloc space containing fault-page
     LinearAllocSpaceData* space_data = nullptr;
@@ -3719,7 +3531,7 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
           // Acquire order to ensure we don't start writing to shadow map, which is
           // shared, before the CAS is successful.
           if (state_arr[page_idx].compare_exchange_strong(
-                  state, PageState::kProcessing, std::memory_order_acquire)) {
+                  state, PageState::kProcessingAndMapping, std::memory_order_acquire)) {
             if (kMode == kCopyMode || is_minor_fault) {
               LinearAllocPageUpdater updater(this);
               uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
@@ -3731,24 +3543,11 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
                 updater.SingleObjectArena(fault_page + diff, gPageSize);
               }
               if (kMode == kCopyMode) {
-                if (updater.WasLastPageTouched()) {
-                  state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
-                  state = PageState::kProcessed;
-                  continue;
-                } else {
-                  // If the page wasn't touched, then it means it is empty and
-                  // is most likely not present on the shadow-side. Furthermore,
-                  // since the shadow is also userfaultfd registered doing copy
-                  // ioctl fails as the copy-from-user in the kernel will cause
-                  // userfault. Instead, just map a zeropage, which is not only
-                  // correct but also efficient as it avoids unnecessary memcpy
-                  // in the kernel.
-                  ZeropageIoctl(
-                      fault_page, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
-                  state_arr[page_idx].store(PageState::kProcessedAndMapped,
-                                            std::memory_order_release);
-                  return;
-                }
+                MapUpdatedLinearAllocPage(fault_page,
+                                          fault_page + diff,
+                                          state_arr[page_idx],
+                                          updater.WasLastPageTouched());
+                return;
               }
             } else {
               // Don't touch the page in this case (there is no reason to do so
@@ -3765,21 +3564,22 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
           }
         }
           continue;
-        case PageState::kProcessed:
-          // Map as many pages as possible in a single ioctl, without spending
-          // time freeing pages.
-          if (MapUpdatedLinearAllocPages(fault_page,
-                                         fault_page + diff,
-                                         state_arr + page_idx,
-                                         space_data->end_ - fault_page,
-                                         /*free_pages=*/false,
-                                         /*single_ioctl=*/true)) {
+        case PageState::kProcessing:
+          DCHECK_EQ(kMode, kMinorFaultMode);
+          if (state_arr[page_idx].compare_exchange_strong(
+                  state, PageState::kProcessingAndMapping, std::memory_order_relaxed) &&
+              !use_uffd_sigbus_) {
+            // Somebody else took or will take care of finishing the updates and
+            // then mapping the page.
             return;
           }
-          // fault_page was not mapped by this thread (some other thread claimed
-          // it). Wait for it to be mapped before returning.
-          FALLTHROUGH_INTENDED;
-        case PageState::kProcessing:
+          continue;
+        case PageState::kProcessed:
+          // The page is processed but not mapped. We should map it.
+          break;
+        case PageState::kMutatorProcessing:
+          LOG(FATAL) << "Unreachable";
+          UNREACHABLE();
         case PageState::kProcessingAndMapping:
         case PageState::kProcessedAndMapping:
           if (use_uffd_sigbus_) {
@@ -3789,9 +3589,6 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
             continue;
           }
           return;
-        case PageState::kMutatorProcessing:
-          LOG(FATAL) << "Unreachable";
-          UNREACHABLE();
         case PageState::kProcessedAndMapped:
           // Somebody else took care of the page.
           return;
@@ -3815,52 +3612,12 @@ void MarkCompact::ProcessLinearAlloc() {
   GcVisitedArenaPool* arena_pool =
       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
   DCHECK_EQ(thread_running_gc_, Thread::Current());
-  uint8_t* unmapped_range_start = nullptr;
-  uint8_t* unmapped_range_end = nullptr;
-  // Pointer to the linear-alloc space containing the current arena in the loop
-  // below. Also helps in ensuring that two arenas, which are contiguous in
-  // address space but are from different linear-alloc spaces, are not coalesced
-  // into one range for mapping purpose.
-  LinearAllocSpaceData* space_data = nullptr;
-  Atomic<PageState>* state_arr = nullptr;
-  ptrdiff_t diff = 0;
-
-  auto map_pages = [&]() {
-    DCHECK_NE(diff, 0);
-    DCHECK_NE(space_data, nullptr);
-    DCHECK_GE(unmapped_range_start, space_data->begin_);
-    DCHECK_LT(unmapped_range_start, space_data->end_);
-    DCHECK_GT(unmapped_range_end, space_data->begin_);
-    DCHECK_LE(unmapped_range_end, space_data->end_);
-    DCHECK_LT(unmapped_range_start, unmapped_range_end);
-    DCHECK_ALIGNED_PARAM(unmapped_range_end - unmapped_range_start, gPageSize);
-    size_t page_idx = DivideByPageSize(unmapped_range_start - space_data->begin_);
-    MapUpdatedLinearAllocPages(unmapped_range_start,
-                               unmapped_range_start + diff,
-                               state_arr + page_idx,
-                               unmapped_range_end - unmapped_range_start,
-                               /*free_pages=*/true,
-                               /*single_ioctl=*/false);
-  };
   for (auto& pair : linear_alloc_arenas_) {
     const TrackedArena* arena = pair.first;
-    size_t arena_size = arena->Size();
-    uint8_t* arena_begin = arena->Begin();
-    // linear_alloc_arenas_ is sorted on arena-begin. So we will get all arenas
-    // in that order.
-    DCHECK_LE(unmapped_range_end, arena_begin);
-    if (unmapped_range_end != nullptr && unmapped_range_end < arena_begin) {
-      map_pages();
-      unmapped_range_end = nullptr;
-    }
-    if (unmapped_range_end == nullptr) {
-      unmapped_range_start = unmapped_range_end = arena_begin;
-    }
-    DCHECK_NE(unmapped_range_start, nullptr);
-    // It's ok to include all arenas in the unmapped range. Since the
-    // corresponding state bytes will be kUnprocessed, we will skip calling
-    // ioctl and madvise on arenas which are waiting to be deleted.
-    unmapped_range_end += arena_size;
+    size_t arena_size;
+    uint8_t* arena_begin;
+    ptrdiff_t diff;
+    bool others_processing;
     {
       // Acquire arena-pool's lock (in shared-mode) so that the arena being updated
       // does not get deleted at the same time. If this critical section is too
@@ -3875,32 +3632,20 @@ void MarkCompact::ProcessLinearAlloc() {
       }
       uint8_t* last_byte = pair.second;
       DCHECK_ALIGNED_PARAM(last_byte, gPageSize);
-      if (space_data == nullptr || space_data->begin_ > arena_begin ||
-          space_data->end_ <= arena_begin) {
-        // Handle the case where previous and current arenas are contiguous but
-        // belong to different spaces.
-        if (space_data != nullptr && unmapped_range_start >= space_data->begin_ &&
-            unmapped_range_start < space_data->end_) {
-          // Subtract arena_size that was added above as it's not part of
-          // `space_data` range.
-          unmapped_range_end -= arena_size;
-          map_pages();
-          unmapped_range_start = arena_begin;
-          unmapped_range_end = arena_begin + arena_size;
-        }
-        space_data = nullptr;
-        // Find the linear-alloc space containing the arena
-        for (auto& data : linear_alloc_spaces_data_) {
-          if (data.begin_ <= arena_begin && arena_begin < data.end_) {
-            space_data = &data;
-            break;
-          }
+      others_processing = false;
+      arena_begin = arena->Begin();
+      arena_size = arena->Size();
+      // Find the linear-alloc space containing the arena
+      LinearAllocSpaceData* space_data = nullptr;
+      for (auto& data : linear_alloc_spaces_data_) {
+        if (data.begin_ <= arena_begin && arena_begin < data.end_) {
+          space_data = &data;
+          break;
         }
-        diff = space_data->shadow_.Begin() - space_data->begin_;
-        state_arr = reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
       }
       CHECK_NE(space_data, nullptr);
-      auto visitor = [space_data, last_byte, diff, this, state_arr](
+      diff = space_data->shadow_.Begin() - space_data->begin_;
+      auto visitor = [space_data, last_byte, diff, this, &others_processing](
                          uint8_t* page_begin,
                          uint8_t* first_obj,
                          size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -3912,12 +3657,16 @@ void MarkCompact::ProcessLinearAlloc() {
         LinearAllocPageUpdater updater(this);
         size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
         DCHECK_LT(page_idx, space_data->page_status_map_.Size());
+        Atomic<PageState>* state_arr =
+            reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
         PageState expected_state = PageState::kUnprocessed;
+        PageState desired_state =
+            minor_fault_initialized_ ? PageState::kProcessing : PageState::kProcessingAndMapping;
         // Acquire order to ensure that we don't start accessing the shadow page,
         // which is shared with other threads, prior to CAS. Also, for same
         // reason, we used 'release' order for changing the state to 'processed'.
         if (state_arr[page_idx].compare_exchange_strong(
-                expected_state, PageState::kProcessing, std::memory_order_acquire)) {
+                expected_state, desired_state, std::memory_order_acquire)) {
           // null first_obj indicates that it's a page from arena for
           // intern-table/class-table. So first object isn't required.
           if (first_obj != nullptr) {
@@ -3928,18 +3677,8 @@ void MarkCompact::ProcessLinearAlloc() {
           }
           expected_state = PageState::kProcessing;
           if (!minor_fault_initialized_) {
-            // Store is sufficient as no other thread could be modifying it. Use
-            // release order to ensure that the writes to shadow page are
-            // committed to memory before.
-            if (updater.WasLastPageTouched()) {
-              state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
-            } else {
-              // See comment in ConcurrentlyProcessLinearAllocPage() with same situation.
-              ZeropageIoctl(
-                  page_begin, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
-              // Ioctl will act as release fence.
-              state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_relaxed);
-            }
+            MapUpdatedLinearAllocPage(
+                page_begin, page_begin + diff, state_arr[page_idx], updater.WasLastPageTouched());
           } else if (!state_arr[page_idx].compare_exchange_strong(
                          expected_state, PageState::kProcessed, std::memory_order_release)) {
             DCHECK_EQ(expected_state, PageState::kProcessingAndMapping);
@@ -3950,15 +3689,19 @@ void MarkCompact::ProcessLinearAlloc() {
             MapProcessedPages</*kFirstPageMapping=*/true>(
                 page_begin, state_arr, page_idx, space_data->page_status_map_.Size());
           }
+        } else {
+          others_processing = true;
         }
       };
 
       arena->VisitRoots(visitor);
     }
-  }
-  if (unmapped_range_end > unmapped_range_start) {
-    // Map remaining pages.
-    map_pages();
+    // If we are not in minor-fault mode and if no other thread was found to be
+    // processing any pages in this arena, then we can madvise the shadow size.
+    // Otherwise, we will double the memory use for linear-alloc.
+    if (!minor_fault_initialized_ && !others_processing) {
+      ZeroAndReleaseMemory(arena_begin + diff, arena_size);
+    }
   }
 }
 
@@ -3976,9 +3719,6 @@ void MarkCompact::RegisterUffd(void* addr, size_t size, int mode) {
       << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
 }
 
-// TODO: sometime we may want to tolerate certain error conditions (like ENOMEM
-// when we unregister the unused portion of the moving-space). Implement support
-// for that.
 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
   DCHECK(IsValidFd(uffd_));
   struct uffdio_range range;
@@ -4028,8 +3768,7 @@ void MarkCompact::CompactionPhase() {
       // range.
       uint8_t* unused_first_page = bump_pointer_space_->Begin() + used_size;
       // It's ok if somebody else already mapped the page.
-      ZeropageIoctl(
-          unused_first_page, gPageSize, /*tolerate_eexist*/ true, /*tolerate_enoent*/ false);
+      ZeropageIoctl(unused_first_page, /*tolerate_eexist*/ true, /*tolerate_enoent*/ false);
       UnregisterUffd(unused_first_page, moving_space_size - used_size);
     }
     CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
@@ -4721,6 +4460,7 @@ void MarkCompact::FinishPhase() {
     }
   }
   class_after_obj_ordered_map_.clear();
+  delete[] moving_pages_status_;
   linear_alloc_arenas_.clear();
   {
     ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 47b2e81e65..076249bf11 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -156,11 +156,9 @@ class MarkCompact final : public GarbageCollector {
 
   // In copy-mode of userfaultfd, we don't need to reach a 'processed' state as
   // it's given that processing thread also copies the page, thereby mapping it.
-  // The order is important as we may treat them as integers. Also
-  // 'kUnprocessed' should be set to 0 as we rely on madvise(dontneed) to return
-  // us zero'ed pages, which implicitly makes page-status initialized to 'kUnprocessed'.
+  // The order is important as we may treat them as integers.
   enum class PageState : uint8_t {
-    kUnprocessed = 0,           // Not processed yet.
+    kUnprocessed = 0,           // Not processed yet
     kProcessing = 1,            // Being processed by GC thread and will not be mapped
     kProcessed = 2,             // Processed but not mapped
     kProcessingAndMapping = 3,  // Being processed by GC or mutator and will be mapped
@@ -178,7 +176,6 @@ class MarkCompact final : public GarbageCollector {
 
  private:
   using ObjReference = mirror::CompressedReference<mirror::Object>;
-  static constexpr uint32_t kPageStateMask = (1 << BitSizeOf<uint8_t>()) - 1;
   // Number of bits (live-words) covered by a single chunk-info (below)
   // entry/word.
   // TODO: Since popcount is performed usomg SIMD instructions, we should
@@ -349,12 +346,12 @@ class MarkCompact final : public GarbageCollector {
   void CompactMovingSpace(uint8_t* page) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Compact the given page as per func and change its state. Also map/copy the
-  // page, if required. Returns true if the page was compacted, else false.
+  // page, if required.
   template <int kMode, typename CompactionFn>
-  ALWAYS_INLINE bool DoPageCompactionWithStateChange(size_t page_idx,
+  ALWAYS_INLINE void DoPageCompactionWithStateChange(size_t page_idx,
+                                                     size_t status_arr_len,
                                                      uint8_t* to_space_page,
                                                      uint8_t* page,
-                                                     bool map_immediately,
                                                      CompactionFn func)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -518,17 +515,7 @@ class MarkCompact final : public GarbageCollector {
   // feature.
   bool CanCompactMovingSpaceWithMinorFault();
 
-  // Does the following:
-  // 1. Checks the status of to-space pages in [cur_page_idx,
-  //    last_checked_reclaim_page_idx_) range to see whether the corresponding
-  //    from-space pages can be reused.
-  // 2. Taking into consideration classes which are allocated after their
-  //    objects (in address order), computes the page (in from-space) from which
-  //    actual reclamation can be done.
-  // 3. Map the pages in [cur_page_idx, end_idx_for_mapping) range.
-  // 4. Madvise the pages in [page from (2), last_reclaimed_page_)
-  bool FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  void FreeFromSpacePages(size_t cur_page_idx, int mode) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Maps processed pages (from moving space and linear-alloc) for uffd's
   // minor-fault feature. We try to 'claim' all processed (and unmapped) pages
@@ -541,14 +528,7 @@ class MarkCompact final : public GarbageCollector {
                          size_t arr_idx,
                          size_t arr_len) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Maps moving space pages in [arr_idx, arr_len) range. It fetches the page
-  // address containing the compacted content from moving_pages_status_ array.
-  // Returns number of bytes (should be multiple of page-size) that are mapped
-  // by the thread.
-  size_t MapMovingSpacePages(size_t arr_idx, size_t arr_len) REQUIRES_SHARED(Locks::mutator_lock_);
-
   bool IsValidFd(int fd) const { return fd >= 0; }
-
   // Add/update <class, obj> pair if class > obj and obj is the lowest address
   // object of class.
   ALWAYS_INLINE void UpdateClassAfterObjectMap(mirror::Object* obj)
@@ -563,27 +543,20 @@ class MarkCompact final : public GarbageCollector {
   void MarkZygoteLargeObjects() REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::heap_bitmap_lock_);
 
-  void ZeropageIoctl(void* addr, size_t length, bool tolerate_eexist, bool tolerate_enoent);
-  void CopyIoctl(void* dst, void* buffer, size_t length);
-
-  // Called after updating linear-alloc page(s) to map the page. It first
-  // updates the state of the pages to kProcessedAndMapping and after ioctl to
-  // kProcessedAndMapped. Returns true if at least one ioctl invocation was
-  // done. If 'free_pages' is true then also frees shadow pages. If 'single_ioctl'
-  // is true, then stops after first ioctl.
-  bool MapUpdatedLinearAllocPages(uint8_t* start_page,
-                                  uint8_t* start_shadow_page,
-                                  Atomic<PageState>* state,
-                                  size_t length,
-                                  bool free_pages,
-                                  bool single_ioctl);
+  void ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent);
+  void CopyIoctl(void* dst, void* buffer);
+  // Called after updating a linear-alloc page to either map a zero-page if the
+  // page wasn't touched during updation, or map the page via copy-ioctl. And
+  // then updates the page's state to indicate the page is mapped.
+  void MapUpdatedLinearAllocPage(uint8_t* page,
+                                 uint8_t* shadow_page,
+                                 Atomic<PageState>& state,
+                                 bool page_touched);
   // Called for clamping of 'info_map_' and other GC data structures, which are
   // small and/or in >4GB address space. There is no real benefit of clamping
   // them synchronously during app forking. It clamps only if clamp_info_map_status_
   // is set to kClampInfoPending, which is done by ClampGrowthLimit().
   void MaybeClampGcStructures() REQUIRES(Locks::heap_bitmap_lock_);
-
-  size_t ComputeInfoMapSize();
   // Initialize all the info-map related fields of this GC. Returns total size
   // of all the structures in info-map.
   size_t InitializeInfoMap(uint8_t* p, size_t moving_space_sz);
@@ -707,24 +680,9 @@ class MarkCompact final : public GarbageCollector {
   // bitmap but has all its super classes lower address order than itself.
   mirror::Class* walk_super_class_cache_;
   // Used by FreeFromSpacePages() for maintaining markers in the moving space for
-  // how far the pages have been reclaimed (madvised) and checked.
-  //
-  // Pages from this index to the end of to-space have been checked (via page_status)
-  // and their corresponding from-space pages are reclaimable.
+  // how far the pages have been reclaimed/checked.
   size_t last_checked_reclaim_page_idx_;
-  // All from-space pages in [last_reclaimed_page_, from_space->End()) are
-  // reclaimed (madvised). Pages in [from-space page corresponding to
-  // last_checked_reclaim_page_idx_, last_reclaimed_page_) are not reclaimed as
-  // they may contain classes required for class hierarchy traversal for
-  // visiting references during compaction.
   uint8_t* last_reclaimed_page_;
-  // All the pages in [last_reclaimable_page_, last_reclaimed_page_) in
-  // from-space are available to store compacted contents for batching until the
-  // next time madvise is called.
-  uint8_t* last_reclaimable_page_;
-  // [cur_reclaimable_page_, last_reclaimed_page_) have been used to store
-  // compacted contents for batching.
-  uint8_t* cur_reclaimable_page_;
 
   space::ContinuousSpace* non_moving_space_;
   space::BumpPointerSpace* const bump_pointer_space_;
@@ -732,11 +690,8 @@ class MarkCompact final : public GarbageCollector {
   accounting::ContinuousSpaceBitmap* const moving_space_bitmap_;
   accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_;
   Thread* thread_running_gc_;
-  // Array of moving-space's pages' compaction status, which is stored in the
-  // least-significant byte. kProcessed entries also contain the from-space
-  // offset of the page which contains the compacted contents of the ith
-  // to-space page.
-  Atomic<uint32_t>* moving_pages_status_;
+  // Array of moving-space's pages' compaction status.
+  Atomic<PageState>* moving_pages_status_;
   size_t vector_length_;
   size_t live_stack_freeze_size_;