Revert "Add concurrent card graying for immune spaces"

Bug: 37876887

This reverts commit 88d329a698ba186aeb1f1ef8794355512ada84a9.

Test: mm

Change-Id: I93880fb7cd8c4c27c65777079d48947075f8cb64
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8afc6ba..08a752f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -7177,10 +7177,10 @@
           instruction, root, /* unpoison_ref_before_marking */ false);
       codegen_->AddSlowPath(slow_path);
 
-      // Test if the GC is marking. Note that X86 and X86_64 don't switch the entrypoints when the
-      // GC is marking.
-      const int32_t is_marking_offset = Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value();
-      __ fs()->cmpl(Address::Absolute(is_marking_offset), Immediate(0));
+      // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
+      __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
       // The entrypoint is null when the GC is not marking.
       __ j(kNotEqual, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c2b1a31..ff6e099 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6542,11 +6542,10 @@
           instruction, root, /* unpoison_ref_before_marking */ false);
       codegen_->AddSlowPath(slow_path);
 
-      // Test if the GC is marking. Note that X86 and X86_64 don't switch the entrypoints when the
-      // GC is marking.
-      const int32_t is_marking_offset =
-          Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value();
-      __ gs()->cmpl(Address::Absolute(is_marking_offset, /* no_rip */ true), Immediate(0));
+      // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
+      __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
       // The entrypoint is null when the GC is not marking.
       __ j(kNotEqual, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 03ae63a..2414b5f 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -373,19 +373,19 @@
   bool IsSharedHeld(const Thread* self) const;
 
   // Assert the current thread has shared access to the ReaderWriterMutex.
-  ALWAYS_INLINE void AssertSharedHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) {
+  void AssertSharedHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) {
     if (kDebugLocking && (gAborting == 0)) {
       // TODO: we can only assert this well when self != null.
       CHECK(IsSharedHeld(self) || self == nullptr) << *this;
     }
   }
-  ALWAYS_INLINE void AssertReaderHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) {
+  void AssertReaderHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) {
     AssertSharedHeld(self);
   }
 
   // Assert the current thread doesn't hold this ReaderWriterMutex either in shared or exclusive
   // mode.
-  ALWAYS_INLINE void AssertNotHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(!this) {
+  void AssertNotHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(!this) {
     if (kDebugLocking && (gAborting == 0)) {
       CHECK(!IsSharedHeld(self)) << *this;
     }
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index c3dd21f..cd30d9d 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -51,7 +51,6 @@
   static constexpr size_t kCardSize = 1 << kCardShift;
   static constexpr uint8_t kCardClean = 0x0;
   static constexpr uint8_t kCardDirty = 0x70;
-  static constexpr uint8_t kCardAged = kCardDirty - 1;
 
   static CardTable* Create(const uint8_t* heap_begin, size_t heap_capacity);
   ~CardTable();
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 3503973..d5c36bf 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -152,8 +152,7 @@
 
 inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
   mirror::Object* ret;
-  // We can get here before marking starts since we gray immune objects before the marking phase.
-  if (from_ref == nullptr || !Thread::Current()->GetIsGcMarking()) {
+  if (from_ref == nullptr) {
     return from_ref;
   }
   // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index b3780e6..e27c1ec 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -163,12 +163,6 @@
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
     InitializePhase();
   }
-  if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
-    // Gray dirty immune objects concurrently to reduce GC pause times. We re-process gray cards in
-    // the pause.
-    ReaderMutexLock mu(self, *Locks::mutator_lock_);
-    GrayAllDirtyImmuneObjects();
-  }
   FlipThreadRoots();
   {
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
@@ -358,12 +352,9 @@
     if (kVerifyNoMissingCardMarks) {
       cc->VerifyNoMissingCardMarks();
     }
-    CHECK_EQ(thread, self);
+    CHECK(thread == self);
     Locks::mutator_lock_->AssertExclusiveHeld(self);
-    {
-      TimingLogger::ScopedTiming split2("(Paused)SetFromSpace", cc->GetTimings());
-      cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_);
-    }
+    cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_);
     cc->SwapStacks();
     if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
       cc->RecordLiveStackFreezeSize(self);
@@ -377,11 +368,11 @@
     }
     if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) {
       CHECK(Runtime::Current()->IsAotCompiler());
-      TimingLogger::ScopedTiming split3("(Paused)VisitTransactionRoots", cc->GetTimings());
+      TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings());
       Runtime::Current()->VisitTransactionRoots(cc);
     }
     if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
-      cc->GrayAllNewlyDirtyImmuneObjects();
+      cc->GrayAllDirtyImmuneObjects();
       if (kIsDebugBuild) {
         // Check that all non-gray immune objects only refernce immune objects.
         cc->VerifyGrayImmuneObjects();
@@ -528,8 +519,8 @@
 
 void ConcurrentCopying::VerifyNoMissingCardMarkCallback(mirror::Object* obj, void* arg) {
   auto* collector = reinterpret_cast<ConcurrentCopying*>(arg);
-  // Objects not on dirty or aged cards should never have references to newly allocated regions.
-  if (collector->heap_->GetCardTable()->GetCard(obj) == gc::accounting::CardTable::kCardClean) {
+  // Objects not on dirty cards should never have references to newly allocated regions.
+  if (!collector->heap_->GetCardTable()->IsDirty(obj)) {
     VerifyNoMissingCardMarkVisitor visitor(collector, /*holder*/ obj);
     obj->VisitReferences</*kVisitNativeRoots*/true, kVerifyNone, kWithoutReadBarrier>(
         visitor,
@@ -592,100 +583,53 @@
   }
 }
 
-template <bool kConcurrent>
 class ConcurrentCopying::GrayImmuneObjectVisitor {
  public:
-  explicit GrayImmuneObjectVisitor(Thread* self) : self_(self) {}
+  explicit GrayImmuneObjectVisitor() {}
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (kUseBakerReadBarrier && obj->GetReadBarrierState() == ReadBarrier::WhiteState()) {
-      if (kConcurrent) {
-        Locks::mutator_lock_->AssertSharedHeld(self_);
-        obj->AtomicSetReadBarrierState(ReadBarrier::WhiteState(), ReadBarrier::GrayState());
-        // Mod union table VisitObjects may visit the same object multiple times so we can't check
-        // the result of the atomic set.
-      } else {
-        Locks::mutator_lock_->AssertExclusiveHeld(self_);
-        obj->SetReadBarrierState(ReadBarrier::GrayState());
+    if (kUseBakerReadBarrier) {
+      if (kIsDebugBuild) {
+        Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
       }
+      obj->SetReadBarrierState(ReadBarrier::GrayState());
     }
   }
 
   static void Callback(mirror::Object* obj, void* arg) REQUIRES_SHARED(Locks::mutator_lock_) {
-    reinterpret_cast<GrayImmuneObjectVisitor<kConcurrent>*>(arg)->operator()(obj);
+    reinterpret_cast<GrayImmuneObjectVisitor*>(arg)->operator()(obj);
   }
-
- private:
-  Thread* const self_;
 };
 
 void ConcurrentCopying::GrayAllDirtyImmuneObjects() {
-  TimingLogger::ScopedTiming split("GrayAllDirtyImmuneObjects", GetTimings());
-  accounting::CardTable* const card_table = heap_->GetCardTable();
-  Thread* const self = Thread::Current();
-  using VisitorType = GrayImmuneObjectVisitor</* kIsConcurrent */ true>;
-  VisitorType visitor(self);
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  accounting::CardTable* const card_table = heap->GetCardTable();
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
     DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-    accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+    GrayImmuneObjectVisitor visitor;
+    accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
     // Mark all the objects on dirty cards since these may point to objects in other space.
     // Once these are marked, the GC will eventually clear them later.
     // Table is non null for boot image and zygote spaces. It is only null for application image
     // spaces.
     if (table != nullptr) {
+      // TODO: Consider adding precleaning outside the pause.
       table->ProcessCards();
-      table->VisitObjects(&VisitorType::Callback, &visitor);
-      // Don't clear cards here since we need to rescan in the pause. If we cleared the cards here,
-      // there would be races with the mutator marking new cards.
-    } else {
-      // Keep cards aged if we don't have a mod-union table since we may need to scan them in future
-      // GCs. This case is for app images.
-      card_table->ModifyCardsAtomic(
-          space->Begin(),
-          space->End(),
-          [](uint8_t card) {
-            return (card != gc::accounting::CardTable::kCardClean)
-                ? gc::accounting::CardTable::kCardAged
-                : card;
-          },
-          /* card modified visitor */ VoidFunctor());
-      card_table->Scan</* kClearCard */ false>(space->GetMarkBitmap(),
-                                               space->Begin(),
-                                               space->End(),
-                                               visitor,
-                                               gc::accounting::CardTable::kCardAged);
-    }
-  }
-}
-
-void ConcurrentCopying::GrayAllNewlyDirtyImmuneObjects() {
-  TimingLogger::ScopedTiming split("(Paused)GrayAllNewlyDirtyImmuneObjects", GetTimings());
-  accounting::CardTable* const card_table = heap_->GetCardTable();
-  using VisitorType = GrayImmuneObjectVisitor</* kIsConcurrent */ false>;
-  Thread* const self = Thread::Current();
-  VisitorType visitor(self);
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-  for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
-    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-    accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
-
-    // Don't need to scan aged cards since we did these before the pause. Note that scanning cards
-    // also handles the mod-union table cards.
-    card_table->Scan</* kClearCard */ false>(space->GetMarkBitmap(),
-                                             space->Begin(),
-                                             space->End(),
-                                             visitor,
-                                             gc::accounting::CardTable::kCardDirty);
-    if (table != nullptr) {
-      // Add the cards to the mod-union table so that we can clear cards to save RAM.
-      table->ProcessCards();
+      table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor);
+      // Since the cards are recorded in the mod-union table and this is paused, we can clear
+      // the cards for the space (to madvise).
       TimingLogger::ScopedTiming split2("(Paused)ClearCards", GetTimings());
       card_table->ClearCardRange(space->Begin(),
                                  AlignDown(space->End(), accounting::CardTable::kCardSize));
+    } else {
+      // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the
+      // pause because app image spaces are all dirty pages anyways.
+      card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor);
     }
   }
-  // Since all of the objects that may point to other spaces are gray, we can avoid all the read
+  // Since all of the objects that may point to other spaces are marked, we can avoid all the read
   // barriers in the immune spaces.
   updated_all_immune_objects_.StoreRelaxed(true);
 }
@@ -714,7 +658,6 @@
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
-      // Only need to scan gray objects.
       if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) {
         collector_->ScanImmuneObject(obj);
         // Done scanning the object, go back to white.
@@ -764,7 +707,6 @@
       if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects && table != nullptr) {
         table->VisitObjects(ImmuneSpaceScanObjVisitor::Callback, &visitor);
       } else {
-        // TODO: Scan only the aged cards.
         live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
                                       reinterpret_cast<uintptr_t>(space->Limit()),
                                       visitor);
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index ae5d068..37b6a2c 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -165,9 +165,6 @@
   void GrayAllDirtyImmuneObjects()
       REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
-  void GrayAllNewlyDirtyImmuneObjects()
-      REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_);
   void VerifyGrayImmuneObjects()
       REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
@@ -342,7 +339,7 @@
   class DisableMarkingCheckpoint;
   class DisableWeakRefAccessCallback;
   class FlipCallback;
-  template <bool kConcurrent> class GrayImmuneObjectVisitor;
+  class GrayImmuneObjectVisitor;
   class ImmuneSpaceScanObjVisitor;
   class LostCopyVisitor;
   class RefFieldsVisitor;
diff --git a/runtime/oat.h b/runtime/oat.h
index e0fd1a2..9b2227b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Increase GC card size.
-  static constexpr uint8_t kOatVersion[] = { '1', '2', '1', '\0' };
+  // Revert concurrent graying for immune spaces.
+  static constexpr uint8_t kOatVersion[] = { '1', '2', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 0a81bfd..62a616b 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -134,13 +134,6 @@
 void Thread::SetIsGcMarkingAndUpdateEntrypoints(bool is_marking) {
   CHECK(kUseReadBarrier);
   tls32_.is_gc_marking = is_marking;
-  if (kUseReadBarrier && (kRuntimeISA == kX86_64 || kRuntimeISA == kX86)) {
-    // Disable entrypoint switching for X86 since we don't always check is_marking with the gray
-    // bit. This causes a race between GrayAllDirtyImmuneObjects and FlipThreadRoots where
-    // we may try to go slow path with a null entrypoint. The fix is to never do entrypoint
-    // switching for x86.
-    is_marking = true;
-  }
   UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking);
   ResetQuickAllocEntryPointsForThread(is_marking);
 }