Refactor and optimize GC code.

Fixed the reference cache mod union table, and re-enabled it by
default. Added a boolean flag to count how many null objects,
immune, fast path, slow path objects we marked.

Slight speedup in mark stack processing, large speedup in image mod
union table scanning.
EvaluateAndApplyChanges Before:
Process mark stack time for full GC only:
12.464089s, 12.357870s, 12.538028s
Time spent marking mod image union table ~240ms.
After:
Process mark stack time: 12.299375s, 12.217142s, 12.187076s
Time spent marking mod image union table ~40ms.

TODO: Refactor reference visiting logic into mirror::Object.

Change-Id: I91889ded9d3f2bf127bc0051c1b1ff77e792e94f
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 7cfeb63..b23b12e 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -31,12 +31,9 @@
 
 class HeapBitmap {
  public:
-  typedef std::vector<SpaceBitmap*, GcAllocator<SpaceBitmap*> > SpaceBitmapVector;
-  typedef std::vector<ObjectSet*, GcAllocator<ObjectSet*> > ObjectSetVector;
-
   bool Test(const mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
-    if (LIKELY(bitmap != NULL)) {
+    if (LIKELY(bitmap != nullptr)) {
       return bitmap->Test(obj);
     } else {
       return GetDiscontinuousSpaceObjectSet(obj) != NULL;
@@ -71,7 +68,7 @@
         return bitmap;
       }
     }
-    return NULL;
+    return nullptr;
   }
 
   ObjectSet* GetDiscontinuousSpaceObjectSet(const mirror::Object* obj) {
@@ -80,7 +77,7 @@
         return space_set;
       }
     }
-    return NULL;
+    return nullptr;
   }
 
   void Walk(ObjectCallback* callback, void* arg)
@@ -110,10 +107,10 @@
   void RemoveDiscontinuousObjectSet(ObjectSet* set);
 
   // Bitmaps covering continuous spaces.
-  SpaceBitmapVector continuous_space_bitmaps_;
+  std::vector<SpaceBitmap*, GcAllocator<SpaceBitmap*>> continuous_space_bitmaps_;
 
   // Sets covering discontinuous spaces.
-  ObjectSetVector discontinuous_space_sets_;
+  std::vector<ObjectSet*, GcAllocator<ObjectSet*>> discontinuous_space_sets_;
 
   friend class art::gc::Heap;
 };
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index 19c6768..76719b6 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -33,11 +33,9 @@
       : ModUnionTableReferenceCache(name, heap, space) {}
 
   bool AddReference(const mirror::Object* /* obj */, const mirror::Object* ref) ALWAYS_INLINE {
-    const std::vector<space::ContinuousSpace*>& spaces = GetHeap()->GetContinuousSpaces();
-    typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-    for (It it = spaces.begin(); it != spaces.end(); ++it) {
-      if ((*it)->Contains(ref)) {
-        return (*it)->IsMallocSpace();
+    for (space::ContinuousSpace* space : GetHeap()->GetContinuousSpaces()) {
+      if (space->HasAddress(ref)) {
+        return !space->IsImageSpace();
       }
     }
     // Assume it points to a large object.
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 8871921..8bc1ced 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -108,7 +108,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(root != NULL);
     ModUnionUpdateObjectReferencesVisitor ref_visitor(callback_, arg_);
-    collector::MarkSweep::VisitObjectReferences(root, ref_visitor, true);
+    collector::MarkSweep::VisitObjectReferences<kMovingClasses>(root, ref_visitor);
   }
 
  private:
@@ -159,7 +159,7 @@
     // We don't have an early exit since we use the visitor pattern, an early
     // exit should significantly speed this up.
     AddToReferenceArrayVisitor visitor(mod_union_table_, references_);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    collector::MarkSweep::VisitObjectReferences<kMovingClasses>(obj, visitor);
   }
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
@@ -210,7 +210,7 @@
     Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
     DCHECK(obj != NULL);
     CheckReferenceVisitor visitor(mod_union_table_, references_);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    collector::MarkSweep::VisitObjectReferences<kMovingClasses>(obj, visitor);
   }
 
  private:
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 022d148..a832615 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -99,7 +99,7 @@
     DCHECK(obj != NULL);
     RememberedSetReferenceVisitor ref_visitor(callback_, target_space_,
                                               contains_reference_to_target_space_, arg_);
-    collector::MarkSweep::VisitObjectReferences(obj, ref_visitor, true);
+    collector::MarkSweep::VisitObjectReferences<kMovingClasses>(obj, ref_visitor);
   }
 
  private:
diff --git a/runtime/gc/collector/immune_region.cc b/runtime/gc/collector/immune_region.cc
index 70a6213..3e1c944 100644
--- a/runtime/gc/collector/immune_region.cc
+++ b/runtime/gc/collector/immune_region.cc
@@ -28,8 +28,8 @@
 }
 
 void ImmuneRegion::Reset() {
-  begin_ = nullptr;
-  end_ = nullptr;
+  SetBegin(nullptr);
+  SetEnd(nullptr);
 }
 
 bool ImmuneRegion::AddContinuousSpace(space::ContinuousSpace* space) {
@@ -41,13 +41,13 @@
   mirror::Object* space_begin = reinterpret_cast<mirror::Object*>(space->Begin());
   mirror::Object* space_limit = reinterpret_cast<mirror::Object*>(space->Limit());
   if (IsEmpty()) {
-    begin_ = space_begin;
-    end_ = space_limit;
+    SetBegin(space_begin);
+    SetEnd(space_limit);
   } else {
     if (space_limit <= begin_) {  // Space is before the immune region.
-      begin_ = space_begin;
+      SetBegin(space_begin);
     } else if (space_begin >= end_) {  // Space is after the immune region.
-      end_ = space_limit;
+      SetEnd(space_limit);
     } else {
       return false;
     }
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index 21d0b43..0c0a89b 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -46,16 +46,29 @@
   bool ContainsSpace(const space::ContinuousSpace* space) const;
   // Returns true if an object is inside of the immune region (assumed to be marked).
   bool ContainsObject(const mirror::Object* obj) const ALWAYS_INLINE {
-    return obj >= begin_ && obj < end_;
+    // Note: Relies on integer underflow behavior.
+    return reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(begin_) < size_;
+  }
+  void SetBegin(mirror::Object* begin) {
+    begin_ = begin;
+    UpdateSize();
+  }
+  void SetEnd(mirror::Object* end) {
+    end_ = end;
+    UpdateSize();
   }
 
  private:
   bool IsEmpty() const {
-    return begin_ == end_;
+    return size_ == 0;
+  }
+  void UpdateSize() {
+    size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_);
   }
 
   mirror::Object* begin_;
   mirror::Object* end_;
+  uintptr_t size_;
 };
 
 }  // namespace collector
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 4915532..f73a50f 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -48,26 +48,25 @@
     if (kCountScannedTypes) {
       ++class_count_;
     }
-    VisitClassReferences(klass, obj, visitor);
+    VisitClassReferences<false>(klass, obj, visitor);
   } else {
     if (kCountScannedTypes) {
       ++other_count_;
     }
-    VisitOtherReferences(klass, obj, visitor);
+    VisitInstanceFieldsReferences<false>(klass, obj, visitor);
     if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) {
       DelayReferenceReferent(klass, obj);
     }
   }
 }
 
-template <typename Visitor>
-inline void MarkSweep::VisitObjectReferences(mirror::Object* obj, const Visitor& visitor,
-                                             bool visit_class)
+template <bool kVisitClass, typename Visitor>
+inline void MarkSweep::VisitObjectReferences(mirror::Object* obj, const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                           Locks::mutator_lock_) {
   mirror::Class* klass = obj->GetClass();
   if (klass->IsArrayClass()) {
-    if (visit_class) {
+    if (kVisitClass) {
       visitor(obj, klass, mirror::Object::ClassOffset(), false);
     }
     if (klass->IsObjectArrayClass<kVerifyNone>()) {
@@ -75,37 +74,46 @@
     }
   } else if (klass == mirror::Class::GetJavaLangClass()) {
     DCHECK_EQ(klass->GetClass<kVerifyNone>(), mirror::Class::GetJavaLangClass());
-    VisitClassReferences(klass, obj, visitor);
+    VisitClassReferences<kVisitClass>(klass, obj, visitor);
   } else {
-    VisitOtherReferences(klass, obj, visitor);
+    VisitInstanceFieldsReferences<kVisitClass>(klass, obj, visitor);
   }
 }
 
-template <typename Visitor>
-inline void MarkSweep::VisitInstanceFieldsReferences(mirror::Class* klass, mirror::Object* obj,
-                                                     const Visitor& visitor)
+template <bool kVisitClass, typename Visitor>
+inline void MarkSweep::VisitInstanceFieldsReferences(mirror::Class* klass,
+                                                     mirror::Object* obj, const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-  VisitFieldsReferences(obj, klass->GetReferenceInstanceOffsets<kVerifyNone>(), false, visitor);
+  VisitFieldsReferences<kVisitClass>(obj, klass->GetReferenceInstanceOffsets<kVerifyNone>(), false,
+                                     visitor);
 }
 
-template <typename Visitor>
+template <bool kVisitClass, typename Visitor>
 inline void MarkSweep::VisitClassReferences(mirror::Class* klass, mirror::Object* obj,
                                             const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-  VisitInstanceFieldsReferences(klass, obj, visitor);
-  VisitStaticFieldsReferences(obj->AsClass<kVerifyNone>(), visitor);
+  VisitInstanceFieldsReferences<kVisitClass>(klass, obj, visitor);
+  VisitStaticFieldsReferences<kVisitClass>(obj->AsClass<kVerifyNone>(), visitor);
 }
 
-template <typename Visitor>
+template <bool kVisitClass, typename Visitor>
 inline void MarkSweep::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-  VisitFieldsReferences(klass, klass->GetReferenceStaticOffsets<kVerifyNone>(), true, visitor);
+  VisitFieldsReferences<kVisitClass>(klass, klass->GetReferenceStaticOffsets<kVerifyNone>(), true,
+                                     visitor);
 }
 
-template <typename Visitor>
+template <bool kVisitClass, typename Visitor>
 inline void MarkSweep::VisitFieldsReferences(mirror::Object* obj, uint32_t ref_offsets,
                                              bool is_static, const Visitor& visitor) {
   if (LIKELY(ref_offsets != CLASS_WALK_SUPER)) {
+    if (!kVisitClass) {
+      // Currently the class bit is always set in the word. Since we count leading zeros to find
+      // the offset and the class bit is at offset 0, it means that the highest bit is the class
+      // bit. We can quickly clear this using xor.
+      ref_offsets ^= kWordHighBitMask;
+      DCHECK_EQ(mirror::Object::ClassOffset().Uint32Value(), 0U);
+    }
     // Found a reference offset bitmap.  Mark the specified offsets.
     while (ref_offsets != 0) {
       size_t right_shift = CLZ(ref_offsets);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 579b781..bfef438 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -79,11 +79,11 @@
 static constexpr bool kParallelProcessMarkStack = true;
 
 // Profiling and information flags.
-static constexpr bool kCountClassesMarked = false;
 static constexpr bool kProfileLargeObjects = false;
 static constexpr bool kMeasureOverhead = false;
 static constexpr bool kCountTasks = false;
 static constexpr bool kCountJavaLangRefs = false;
+static constexpr bool kCountMarkedObjects = false;
 
 // Turn off kCheckLocks when profiling the GC since it slows the GC down by up to 40%.
 static constexpr bool kCheckLocks = kDebugLocking;
@@ -109,9 +109,6 @@
     : GarbageCollector(heap,
                        name_prefix +
                        (is_concurrent ? "concurrent mark sweep": "mark sweep")),
-      current_mark_bitmap_(NULL),
-      mark_stack_(NULL),
-      live_stack_freeze_size_(0),
       gc_barrier_(new Barrier(0)),
       large_object_lock_("mark sweep large object lock", kMarkSweepLargeObjectLock),
       mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock),
@@ -129,13 +126,20 @@
   other_count_ = 0;
   large_object_test_ = 0;
   large_object_mark_ = 0;
-  classes_marked_ = 0;
   overhead_time_ = 0;
   work_chunks_created_ = 0;
   work_chunks_deleted_ = 0;
   reference_count_ = 0;
-
-  FindDefaultMarkBitmap();
+  mark_null_count_ = 0;
+  mark_immune_count_ = 0;
+  mark_fastpath_count_ = 0;
+  mark_slowpath_count_ = 0;
+  FindDefaultSpaceBitmap();
+  {
+    // TODO: I don't think we should need heap bitmap lock to get the mark bitmap.
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    mark_bitmap_ = heap_->GetMarkBitmap();
+  }
 
   // Do any pre GC verification.
   timings_.NewSplit("PreGcVerification");
@@ -247,7 +251,7 @@
   Thread* self = Thread::Current();
 
   BindBitmaps();
-  FindDefaultMarkBitmap();
+  FindDefaultSpaceBitmap();
 
   // Process dirty cards and add dirty cards to mod union tables.
   heap_->ProcessCards(timings_, false);
@@ -356,14 +360,13 @@
   }
 }
 
-void MarkSweep::FindDefaultMarkBitmap() {
+void MarkSweep::FindDefaultSpaceBitmap() {
   TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
     if (bitmap != nullptr &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
-      current_mark_bitmap_ = bitmap;
-      CHECK(current_mark_bitmap_ != NULL);
+      current_space_bitmap_ = bitmap;
       return;
     }
   }
@@ -389,7 +392,7 @@
   }
 }
 
-inline void MarkSweep::MarkObjectNonNullParallel(const Object* obj) {
+inline void MarkSweep::MarkObjectNonNullParallel(Object* obj) {
   DCHECK(obj != NULL);
   if (MarkObjectParallel(obj)) {
     MutexLock mu(Thread::Current(), mark_stack_lock_);
@@ -397,7 +400,7 @@
       ExpandMarkStack();
     }
     // The object must be pushed on to the mark stack.
-    mark_stack_->PushBack(const_cast<Object*>(obj));
+    mark_stack_->PushBack(obj);
   }
 }
 
@@ -409,17 +412,15 @@
 
 inline void MarkSweep::UnMarkObjectNonNull(const Object* obj) {
   DCHECK(!immune_region_.ContainsObject(obj));
-
   if (kUseBrooksPointer) {
     // Verify all the objects have the correct Brooks pointer installed.
     obj->AssertSelfBrooksPointer();
   }
-
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_mark_bitmap_;
+  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
+    accounting::SpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
     if (LIKELY(new_bitmap != NULL)) {
       object_bitmap = new_bitmap;
     } else {
@@ -427,50 +428,54 @@
       return;
     }
   }
-
   DCHECK(object_bitmap->HasAddress(obj));
   object_bitmap->Clear(obj);
 }
 
-inline void MarkSweep::MarkObjectNonNull(const Object* obj) {
-  DCHECK(obj != NULL);
-
+inline void MarkSweep::MarkObjectNonNull(Object* obj) {
+  DCHECK(obj != nullptr);
   if (kUseBrooksPointer) {
     // Verify all the objects have the correct Brooks pointer installed.
     obj->AssertSelfBrooksPointer();
   }
-
   if (immune_region_.ContainsObject(obj)) {
+    if (kCountMarkedObjects) {
+      ++mark_immune_count_;
+    }
     DCHECK(IsMarked(obj));
     return;
   }
-
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_mark_bitmap_;
+  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
-    if (LIKELY(new_bitmap != NULL)) {
-      object_bitmap = new_bitmap;
-    } else {
+    object_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
+    if (kCountMarkedObjects) {
+      ++mark_slowpath_count_;
+    }
+    if (UNLIKELY(object_bitmap == nullptr)) {
       MarkLargeObject(obj, true);
       return;
     }
+  } else if (kCountMarkedObjects) {
+    ++mark_fastpath_count_;
   }
-
   // This object was not previously marked.
-  if (!object_bitmap->Test(obj)) {
-    object_bitmap->Set(obj);
-    if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
-      // Lock is not needed but is here anyways to please annotalysis.
-      MutexLock mu(Thread::Current(), mark_stack_lock_);
-      ExpandMarkStack();
-    }
-    // The object must be pushed on to the mark stack.
-    mark_stack_->PushBack(const_cast<Object*>(obj));
+  if (!object_bitmap->Set(obj)) {
+    PushOnMarkStack(obj);
   }
 }
 
+inline void MarkSweep::PushOnMarkStack(Object* obj) {
+  if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
+    // Lock is not needed but is here anyways to please annotalysis.
+    MutexLock mu(Thread::Current(), mark_stack_lock_);
+    ExpandMarkStack();
+  }
+  // The object must be pushed on to the mark stack.
+  mark_stack_->PushBack(obj);
+}
+
 // Rare case, probably not worth inlining since it will increase instruction cache miss rate.
 bool MarkSweep::MarkLargeObject(const Object* obj, bool set) {
   // TODO: support >1 discontinuous space.
@@ -500,23 +505,20 @@
 }
 
 inline bool MarkSweep::MarkObjectParallel(const Object* obj) {
-  DCHECK(obj != NULL);
-
+  DCHECK(obj != nullptr);
   if (kUseBrooksPointer) {
     // Verify all the objects have the correct Brooks pointer installed.
     obj->AssertSelfBrooksPointer();
   }
-
   if (immune_region_.ContainsObject(obj)) {
     DCHECK(IsMarked(obj));
     return false;
   }
-
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_mark_bitmap_;
+  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
+    accounting::SpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
     if (new_bitmap != NULL) {
       object_bitmap = new_bitmap;
     } else {
@@ -526,23 +528,20 @@
       return MarkLargeObject(obj, true);
     }
   }
-
   // Return true if the object was not previously marked.
   return !object_bitmap->AtomicTestAndSet(obj);
 }
 
-// Used to mark objects when recursing.  Recursion is done by moving
-// the finger across the bitmaps in address order and marking child
-// objects.  Any newly-marked objects whose addresses are lower than
-// the finger won't be visited by the bitmap scan, so those objects
-// need to be added to the mark stack.
-inline void MarkSweep::MarkObject(const Object* obj) {
-  if (obj != NULL) {
+// Used to mark objects when processing the mark stack. If an object is null, it is not marked.
+inline void MarkSweep::MarkObject(Object* obj) {
+  if (obj != nullptr) {
     MarkObjectNonNull(obj);
+  } else if (kCountMarkedObjects) {
+    ++mark_null_count_;
   }
 }
 
-void MarkSweep::MarkRootParallelCallback(mirror::Object** root, void* arg, uint32_t /*thread_id*/,
+void MarkSweep::MarkRootParallelCallback(Object** root, void* arg, uint32_t /*thread_id*/,
                                          RootType /*root_type*/) {
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(*root);
 }
@@ -630,7 +629,7 @@
 class MarkStackTask : public Task {
  public:
   MarkStackTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, size_t mark_stack_size,
-                const Object** mark_stack)
+                Object** mark_stack)
       : mark_sweep_(mark_sweep),
         thread_pool_(thread_pool),
         mark_stack_pos_(mark_stack_size) {
@@ -686,11 +685,11 @@
   MarkSweep* const mark_sweep_;
   ThreadPool* const thread_pool_;
   // Thread local mark stack for this task.
-  const Object* mark_stack_[kMaxSize];
+  Object* mark_stack_[kMaxSize];
   // Mark stack position.
   size_t mark_stack_pos_;
 
-  void MarkStackPush(const Object* obj) ALWAYS_INLINE {
+  void MarkStackPush(Object* obj) ALWAYS_INLINE {
     if (UNLIKELY(mark_stack_pos_ == kMaxSize)) {
       // Mark stack overflow, give 1/2 the stack to the thread pool as a new work task.
       mark_stack_pos_ /= 2;
@@ -699,7 +698,7 @@
       thread_pool_->AddTask(Thread::Current(), task);
     }
     DCHECK(obj != nullptr);
-    DCHECK(mark_stack_pos_ < kMaxSize);
+    DCHECK_LT(mark_stack_pos_, kMaxSize);
     mark_stack_[mark_stack_pos_++] = obj;
   }
 
@@ -712,12 +711,12 @@
     ScanObjectParallelVisitor visitor(this);
     // TODO: Tune this.
     static const size_t kFifoSize = 4;
-    BoundedFifoPowerOfTwo<const Object*, kFifoSize> prefetch_fifo;
+    BoundedFifoPowerOfTwo<Object*, kFifoSize> prefetch_fifo;
     for (;;) {
-      const Object* obj = nullptr;
+      Object* obj = nullptr;
       if (kUseMarkStackPrefetch) {
         while (mark_stack_pos_ != 0 && prefetch_fifo.size() < kFifoSize) {
-          const Object* obj = mark_stack_[--mark_stack_pos_];
+          Object* obj = mark_stack_[--mark_stack_pos_];
           DCHECK(obj != nullptr);
           __builtin_prefetch(obj);
           prefetch_fifo.push_back(obj);
@@ -734,7 +733,7 @@
         obj = mark_stack_[--mark_stack_pos_];
       }
       DCHECK(obj != nullptr);
-      visitor(const_cast<mirror::Object*>(obj));
+      visitor(obj);
     }
   }
 };
@@ -743,7 +742,7 @@
  public:
   CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, accounting::SpaceBitmap* bitmap,
                byte* begin, byte* end, byte minimum_age, size_t mark_stack_size,
-               const Object** mark_stack_obj)
+               Object** mark_stack_obj)
       : MarkStackTask<false>(thread_pool, mark_sweep, mark_stack_size, mark_stack_obj),
         bitmap_(bitmap),
         begin_(begin),
@@ -794,8 +793,8 @@
     // scanned at the same time.
     timings_.StartSplit(paused ? "(Paused)ScanGrayObjects" : "ScanGrayObjects");
     // Try to take some of the mark stack since we can pass this off to the worker tasks.
-    const Object** mark_stack_begin = const_cast<const Object**>(mark_stack_->Begin());
-    const Object** mark_stack_end = const_cast<const Object**>(mark_stack_->End());
+    Object** mark_stack_begin = mark_stack_->Begin();
+    Object** mark_stack_end = mark_stack_->End();
     const size_t mark_stack_size = mark_stack_end - mark_stack_begin;
     // Estimated number of work tasks we will create.
     const size_t mark_stack_tasks = GetHeap()->GetContinuousSpaces().size() * thread_count;
@@ -828,7 +827,7 @@
         size_t mark_stack_increment = std::min(mark_stack_delta, mark_stack_remaining);
         mark_stack_end -= mark_stack_increment;
         mark_stack_->PopBackCount(static_cast<int32_t>(mark_stack_increment));
-        DCHECK_EQ(mark_stack_end, const_cast<const art::mirror::Object **>(mark_stack_->End()));
+        DCHECK_EQ(mark_stack_end, mark_stack_->End());
         // Add the new task to the thread pool.
         auto* task = new CardScanTask(thread_pool, this, space->GetMarkBitmap(), card_begin,
                                       card_begin + card_increment, minimum_age,
@@ -917,8 +916,8 @@
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
       if ((space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) ||
           (!partial && space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
-        current_mark_bitmap_ = space->GetMarkBitmap();
-        if (current_mark_bitmap_ == nullptr) {
+        current_space_bitmap_ = space->GetMarkBitmap();
+        if (current_space_bitmap_ == nullptr) {
           continue;
         }
         if (parallel) {
@@ -937,7 +936,7 @@
             delta = RoundUp(delta, KB);
             if (delta < 16 * KB) delta = end - begin;
             begin += delta;
-            auto* task = new RecursiveMarkTask(thread_pool, this, current_mark_bitmap_, start,
+            auto* task = new RecursiveMarkTask(thread_pool, this, current_space_bitmap_, start,
                                                begin);
             thread_pool->AddTask(self, task);
           }
@@ -949,7 +948,7 @@
           // This function does not handle heap end increasing, so we must use the space end.
           uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
           uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-          current_mark_bitmap_->VisitMarkedRange(begin, end, scan_visitor);
+          current_space_bitmap_->VisitMarkedRange(begin, end, scan_visitor);
         }
       }
     }
@@ -1203,6 +1202,9 @@
 // the heap for later processing.
 void MarkSweep::DelayReferenceReferent(mirror::Class* klass, Object* obj) {
   DCHECK(klass != nullptr);
+  if (kCountJavaLangRefs) {
+    ++reference_count_;
+  }
   heap_->DelayReferenceReferent(klass, obj->AsReference(), IsMarkedCallback, this);
 }
 
@@ -1211,7 +1213,7 @@
   explicit MarkObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) {}
 
   // TODO: Fixme when anotatalysis works with visitors.
-  void operator()(const Object* /* obj */, const Object* ref, const MemberOffset& /* offset */,
+  void operator()(Object* /* obj */, Object* ref, const MemberOffset& /* offset */,
                   bool /* is_static */) const ALWAYS_INLINE
       NO_THREAD_SAFETY_ANALYSIS {
     if (kCheckLocks) {
@@ -1233,7 +1235,6 @@
 }
 
 void MarkSweep::ProcessMarkStackPausedCallback(void* arg) {
-  DCHECK(arg != nullptr);
   reinterpret_cast<MarkSweep*>(arg)->ProcessMarkStack(true);
 }
 
@@ -1246,8 +1247,7 @@
   // Split the current mark stack up into work tasks.
   for (mirror::Object **it = mark_stack_->Begin(), **end = mark_stack_->End(); it < end; ) {
     const size_t delta = std::min(static_cast<size_t>(end - it), chunk_size);
-    thread_pool->AddTask(self, new MarkStackTask<false>(thread_pool, this, delta,
-                                                        const_cast<const mirror::Object**>(it)));
+    thread_pool->AddTask(self, new MarkStackTask<false>(thread_pool, this, delta, it));
     it += delta;
   }
   thread_pool->SetMaxActiveWorkers(thread_count - 1);
@@ -1301,11 +1301,10 @@
   if (immune_region_.ContainsObject(object)) {
     return true;
   }
-  DCHECK(current_mark_bitmap_ != NULL);
-  if (current_mark_bitmap_->HasAddress(object)) {
-    return current_mark_bitmap_->Test(object);
+  if (current_space_bitmap_->HasAddress(object)) {
+    return current_space_bitmap_->Test(object);
   }
-  return heap_->GetMarkBitmap()->Test(object);
+  return mark_bitmap_->Test(object);
 }
 
 void MarkSweep::FinishPhase() {
@@ -1314,44 +1313,35 @@
   Heap* heap = GetHeap();
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
-
-  // Update the cumulative statistics
+  // Update the cumulative statistics.
   total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
   total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
-
   // Ensure that the mark stack is empty.
   CHECK(mark_stack_->IsEmpty());
-
   if (kCountScannedTypes) {
     VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_
              << " other=" << other_count_;
   }
-
   if (kCountTasks) {
     VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_;
   }
-
   if (kMeasureOverhead) {
     VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_);
   }
-
   if (kProfileLargeObjects) {
     VLOG(gc) << "Large objects tested " << large_object_test_ << " marked " << large_object_mark_;
   }
-
-  if (kCountClassesMarked) {
-    VLOG(gc) << "Classes marked " << classes_marked_;
-  }
-
   if (kCountJavaLangRefs) {
     VLOG(gc) << "References scanned " << reference_count_;
   }
-
+  if (kCountMarkedObjects) {
+    VLOG(gc) << "Marked: null=" << mark_null_count_ << " immune=" <<  mark_immune_count_
+        << " fastpath=" << mark_fastpath_count_ << " slowpath=" << mark_slowpath_count_;
+  }
   // Update the cumulative loggers.
   cumulative_timings_.Start();
   cumulative_timings_.AddLogger(timings_);
   cumulative_timings_.End();
-
   // Clear all of the spaces' mark bitmaps.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
@@ -1361,7 +1351,6 @@
     }
   }
   mark_stack_->Reset();
-
   // Reset the marked large objects.
   space::LargeObjectSpace* large_objects = GetHeap()->GetLargeObjectsSpace();
   large_objects->GetMarkObjects()->Clear();
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index b4dd8c7..59f8e28 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -91,7 +91,7 @@
   void Init();
 
   // Find the default mark bitmap.
-  void FindDefaultMarkBitmap();
+  void FindDefaultSpaceBitmap();
 
   // Marks all objects in the root set at the start of a garbage collection.
   void MarkRoots(Thread* self)
@@ -180,10 +180,9 @@
   void VerifyIsLive(const mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  template <typename Visitor>
-  static void VisitObjectReferences(mirror::Object* obj, const Visitor& visitor, bool visit_class)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
-                            Locks::mutator_lock_);
+  template <bool kVisitClass, typename Visitor>
+  static void VisitObjectReferences(mirror::Object* obj, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   static mirror::Object* MarkObjectCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -207,7 +206,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Marks an object.
-  void MarkObject(const mirror::Object* obj)
+  void MarkObject(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -223,10 +222,9 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
-                            Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  void MarkObjectNonNull(const mirror::Object* obj)
+  void MarkObjectNonNull(mirror::Object* obj)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -242,12 +240,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Marks an object atomically, safe to use from multiple threads.
-  void MarkObjectNonNullParallel(const mirror::Object* obj);
+  void MarkObjectNonNullParallel(mirror::Object* obj);
 
   // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
   // mark, otherwise we unmark.
   bool MarkLargeObject(const mirror::Object* obj, bool set)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) LOCKS_EXCLUDED(large_object_lock_);
 
   // Returns true if we need to add obj to a mark stack.
   bool MarkObjectParallel(const mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
@@ -271,22 +269,22 @@
   void VerifyRoot(const mirror::Object* root, size_t vreg, const StackVisitor* visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
-  template <typename Visitor>
+  template <bool kVisitClass, typename Visitor>
   static void VisitInstanceFieldsReferences(mirror::Class* klass, mirror::Object* obj,
                                             const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Visit the header, static field references, and interface pointers of a class object.
-  template <typename Visitor>
+  template <bool kVisitClass, typename Visitor>
   static void VisitClassReferences(mirror::Class* klass, mirror::Object* obj,
                                    const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  template <typename Visitor>
+  template <bool kVisitClass, typename Visitor>
   static void VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  template <typename Visitor>
+  template <bool kVisitClass, typename Visitor>
   static void VisitFieldsReferences(mirror::Object* obj, uint32_t ref_offsets, bool is_static,
                                     const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -297,13 +295,8 @@
                                          const Visitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  // Visits the header and field references of a data object.
-  template <typename Visitor>
-  static void VisitOtherReferences(mirror::Class* klass, mirror::Object* obj,
-                                   const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    return VisitInstanceFieldsReferences(klass, obj, visitor);
-  }
+  // Push a single reference on a mark stack.
+  void PushOnMarkStack(mirror::Object* obj);
 
   // Blackens objects grayed during a garbage collection.
   void ScanGrayObjects(bool paused, byte minimum_age)
@@ -346,7 +339,9 @@
 
   // Current space, we check this space first to avoid searching for the appropriate space for an
   // object.
-  accounting::SpaceBitmap* current_mark_bitmap_;
+  accounting::SpaceBitmap* current_space_bitmap_;
+  // Cache the heap's mark bitmap to prevent having to do 2 loads during slow path marking.
+  accounting::HeapBitmap* mark_bitmap_;
 
   accounting::ObjectStack* mark_stack_;
 
@@ -363,11 +358,14 @@
   AtomicInteger other_count_;
   AtomicInteger large_object_test_;
   AtomicInteger large_object_mark_;
-  AtomicInteger classes_marked_;
   AtomicInteger overhead_time_;
   AtomicInteger work_chunks_created_;
   AtomicInteger work_chunks_deleted_;
   AtomicInteger reference_count_;
+  AtomicInteger mark_null_count_;
+  AtomicInteger mark_immune_count_;
+  AtomicInteger mark_fastpath_count_;
+  AtomicInteger mark_slowpath_count_;
 
   // Verification.
   size_t live_stack_freeze_size_;
@@ -386,6 +384,7 @@
   friend class art::gc::Heap;
   friend class InternTableEntryIsUnmarked;
   friend class MarkIfReachesAllocspaceVisitor;
+  friend class MarkObjectVisitor;
   friend class ModUnionCheckReferences;
   friend class ModUnionClearCardVisitor;
   friend class ModUnionReferenceVisitor;
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 565966a..b6591fc 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -278,7 +278,7 @@
   DCHECK(obj != NULL);
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
   SemiSpaceVerifyNoFromSpaceReferencesVisitor visitor(from_space_);
-  MarkSweep::VisitObjectReferences(obj, visitor, kMovingClasses);
+  MarkSweep::VisitObjectReferences<kMovingClasses>(obj, visitor);
 }
 
 class SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor {
@@ -739,7 +739,7 @@
   DCHECK(obj != NULL);
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
   SemiSpaceMarkObjectVisitor visitor(this);
-  MarkSweep::VisitObjectReferences(obj, visitor, kMovingClasses);
+  MarkSweep::VisitObjectReferences<kMovingClasses>(obj, visitor);
   mirror::Class* klass = obj->GetClass<kVerifyNone>();
   if (UNLIKELY(klass->IsReferenceClass<kVerifyNone>())) {
     DelayReferenceReferent(klass, obj);
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 7cc7f9b..0d77f29 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -66,18 +66,19 @@
   // If true, use remembered sets in the generational mode.
   static constexpr bool kUseRememberedSet = true;
 
-  explicit SemiSpace(Heap* heap, bool generational = false,
-                     const std::string& name_prefix = "");
+  explicit SemiSpace(Heap* heap, bool generational = false, const std::string& name_prefix = "");
 
   ~SemiSpace() {}
 
-  virtual void InitializePhase();
-  virtual void MarkingPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void ReclaimPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void FinishPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MarkReachableObjects()
+  virtual void InitializePhase() OVERRIDE;
+  virtual void MarkingPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  virtual void ReclaimPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  virtual void FinishPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void MarkReachableObjects()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-  virtual GcType GetGcType() const {
+  virtual GcType GetGcType() const OVERRIDE {
     return kGcTypePartial;
   }
   virtual CollectorType GetCollectorType() const OVERRIDE {
@@ -112,12 +113,13 @@
 
   // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
   // the image. Mark that portion of the heap as immune.
-  virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   void UnBindBitmaps()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  void ProcessReferences(Thread* self)
+  void ProcessReferences(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
@@ -126,22 +128,9 @@
   // Sweeps unmarked objects to complete the garbage collection.
   void SweepLargeObjects(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  // Sweep only pointers within an array. WARNING: Trashes objects.
-  void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  // TODO: enable thread safety analysis when in use by multiple worker threads.
-  template <typename MarkVisitor>
-  void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
-      NO_THREAD_SAFETY_ANALYSIS;
-
   void SweepSystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  template <typename Visitor>
-  static void VisitObjectReferencesAndClass(mirror::Object* obj, const Visitor& visitor)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
   static void MarkRootCallback(mirror::Object** root, void* arg, uint32_t /*tid*/,
                                RootType /*root_type*/)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -158,15 +147,18 @@
  protected:
   // Returns null if the object is not marked, otherwise returns the forwarding address (same as
   // object for non movable things).
-  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const;
+  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
   // mark, otherwise we unmark.
   bool MarkLargeObject(const mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   // Expand mark stack to 2x its current size.
   void ResizeMarkStack(size_t new_size);
@@ -174,48 +166,8 @@
   // Returns true if we should sweep the space.
   virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const;
 
-  static void VerifyRootCallback(const mirror::Object* root, void* arg, size_t vreg,
-                                 const StackVisitor *visitor);
-
-  void VerifyRoot(const mirror::Object* root, size_t vreg, const StackVisitor* visitor)
-      NO_THREAD_SAFETY_ANALYSIS;
-
-  template <typename Visitor>
-  static void VisitInstanceFieldsReferences(const mirror::Class* klass, const mirror::Object* obj,
-                                            const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visit the header, static field references, and interface pointers of a class object.
-  template <typename Visitor>
-  static void VisitClassReferences(const mirror::Class* klass, const mirror::Object* obj,
-                                   const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  template <typename Visitor>
-  static void VisitStaticFieldsReferences(const mirror::Class* klass, const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  template <typename Visitor>
-  static void VisitFieldsReferences(const mirror::Object* obj, uint32_t ref_offsets, bool is_static,
-                                    const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visit all of the references in an object array.
-  template <typename Visitor>
-  static void VisitObjectArrayReferences(const mirror::ObjectArray<mirror::Object>* array,
-                                         const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
-
-  // Visits the header and field references of a data object.
-  template <typename Visitor>
-  static void VisitOtherReferences(const mirror::Class* klass, const mirror::Object* obj,
-                                   const Visitor& visitor)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    return VisitInstanceFieldsReferences(klass, obj, visitor);
-  }
-
   // Push an object onto the mark stack.
-  inline void MarkStackPush(mirror::Object* obj);
+  void MarkStackPush(mirror::Object* obj);
 
   void UpdateAndMarkModUnion()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -229,15 +181,6 @@
   void ProcessMarkStack()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
-  void EnqueueFinalizerReferences(mirror::Object** ref)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
-  void PreserveSomeSoftReferences(mirror::Object** ref)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
-  void ClearWhiteReferences(mirror::Object** list)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
-
   void ProcessReferences(mirror::Object** soft_references, bool clear_soft_references,
                          mirror::Object** weak_references,
                          mirror::Object** finalizer_references,
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 8d06673..11c0f71 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -276,7 +276,8 @@
   // Card cache for now since it makes it easier for us to update the references to the copying
   // spaces.
   accounting::ModUnionTable* mod_union_table =
-      new accounting::ModUnionTableCardCache("Image mod-union table", this, GetImageSpace());
+      new accounting::ModUnionTableToZygoteAllocspace("Image mod-union table", this,
+                                                      GetImageSpace());
   CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
   AddModUnionTable(mod_union_table);
 
@@ -773,9 +774,9 @@
                              IsMarkedCallback* is_marked_callback,
                              MarkObjectCallback* mark_object_callback,
                              ProcessMarkStackCallback* process_mark_stack_callback, void* arg) {
+  timings.StartSplit("(Paused)ProcessReferences");
   ProcessSoftReferences(timings, clear_soft, is_marked_callback, mark_object_callback,
                         process_mark_stack_callback, arg);
-  timings.StartSplit("(Paused)ProcessReferences");
   // Clear all remaining soft and weak references with white referents.
   soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
   weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
@@ -1301,8 +1302,8 @@
   // For bitmap Visit.
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(const mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
-    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(o), *this, true);
+  void operator()(mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
+    collector::MarkSweep::VisitObjectReferences<true>(o, *this);
   }
 
   // For MarkSweep::VisitObjectReferences.
@@ -2032,7 +2033,7 @@
     // be live or else how did we find it in the live bitmap?
     VerifyReferenceVisitor visitor(heap_);
     // The class doesn't count as a reference but we should verify it anyways.
-    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    collector::MarkSweep::VisitObjectReferences<true>(obj, visitor);
     if (obj->IsReferenceInstance()) {
       mirror::Reference* ref = obj->AsReference();
       visitor(obj, ref->GetReferent(), mirror::Reference::ReferentOffset(), false);
@@ -2172,7 +2173,7 @@
   void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     VerifyReferenceCardVisitor visitor(heap_, const_cast<bool*>(&failed_));
-    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(obj), visitor, true);
+    collector::MarkSweep::VisitObjectReferences<true>(obj, visitor);
   }
 
   bool Failed() const {