Merge "Use CompilerOptions for implicit stack overflow checks"
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 55866e2..c68b1d0 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -66,21 +66,14 @@
   RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
   LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
                     OpSize size, VolatileKind is_volatile) OVERRIDE;
-  LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                   VolatileKind is_volatile) OVERRIDE;
   LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
                        OpSize size) OVERRIDE;
-  LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale)
-      OVERRIDE;
   LIR* LoadConstantNoClobber(RegStorage r_dest, int value) OVERRIDE;
   LIR* LoadConstantWide(RegStorage r_dest, int64_t value) OVERRIDE;
   LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size,
                      VolatileKind is_volatile) OVERRIDE;
-  LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, VolatileKind is_volatile)
-      OVERRIDE;
   LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
                         OpSize size) OVERRIDE;
-  LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale) OVERRIDE;
 
   /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
   void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 78a6df8..a331f41 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -1062,9 +1062,11 @@
       opcode = WIDE(kA64Ldr4rXxG);
       expected_scale = 3;
       break;
-    case kSingle:     // Intentional fall-through.
-    case k32:         // Intentional fall-through.
     case kReference:
+      r_dest = As32BitReg(r_dest);
+      FALLTHROUGH_INTENDED;
+    case kSingle:     // Intentional fall-through.
+    case k32:
       r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldr4rXxG;
       expected_scale = 2;
@@ -1105,11 +1107,6 @@
   return load;
 }
 
-LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
-                                  int scale) {
-  return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference);
-}
-
 LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                                     int scale, OpSize size) {
   LIR* store;
@@ -1150,9 +1147,11 @@
       opcode = WIDE(kA64Str4rXxG);
       expected_scale = 3;
       break;
-    case kSingle:     // Intentional fall-trough.
-    case k32:         // Intentional fall-trough.
     case kReference:
+      r_src = As32BitReg(r_src);
+      FALLTHROUGH_INTENDED;
+    case kSingle:     // Intentional fall-trough.
+    case k32:
       r_src = Check32BitReg(r_src);
       opcode = kA64Str4rXxG;
       expected_scale = 2;
@@ -1185,11 +1184,6 @@
   return store;
 }
 
-LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
-                                   int scale) {
-  return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference);
-}
-
 /*
  * Load value from base + displacement.  Optionally perform null check
  * on base (which must have an associated s_reg and MIR).  If not
@@ -1217,9 +1211,11 @@
         alt_opcode = WIDE(kA64Ldur3rXd);
       }
       break;
-    case kSingle:     // Intentional fall-through.
-    case k32:         // Intentional fall-trough.
     case kReference:
+      r_dest = As32BitReg(r_dest);
+      FALLTHROUGH_INTENDED;
+    case kSingle:     // Intentional fall-through.
+    case k32:
       r_dest = Check32BitReg(r_dest);
       scale = 2;
       if (r_dest.IsFloat()) {
@@ -1260,7 +1256,9 @@
     // TODO: cleaner support for index/displacement registers?  Not a reference, but must match width.
     RegStorage r_scratch = AllocTempWide();
     LoadConstantWide(r_scratch, displacement);
-    load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
+    load = LoadBaseIndexed(r_base, r_scratch,
+                           (size == kReference) ? As64BitReg(r_dest) : r_dest,
+                           0, size);
     FreeTemp(r_scratch);
   }
 
@@ -1287,11 +1285,6 @@
   return load;
 }
 
-LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                               VolatileKind is_volatile) {
-  return LoadBaseDisp(r_base, displacement, As32BitReg(r_dest), kReference, is_volatile);
-}
-
 LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
                                      OpSize size) {
   LIR* store = NULL;
@@ -1314,9 +1307,11 @@
         alt_opcode = WIDE(kA64Stur3rXd);
       }
       break;
-    case kSingle:     // Intentional fall-through.
-    case k32:         // Intentional fall-trough.
     case kReference:
+      r_src = As32BitReg(r_src);
+      FALLTHROUGH_INTENDED;
+    case kSingle:     // Intentional fall-through.
+    case k32:
       r_src = Check32BitReg(r_src);
       scale = 2;
       if (r_src.IsFloat()) {
@@ -1351,7 +1346,9 @@
     // Use long sequence.
     RegStorage r_scratch = AllocTempWide();
     LoadConstantWide(r_scratch, displacement);
-    store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
+    store = StoreBaseIndexed(r_base, r_scratch,
+                             (size == kReference) ? As64BitReg(r_src) : r_src,
+                             0, size);
     FreeTemp(r_scratch);
   }
 
@@ -1385,11 +1382,6 @@
   return store;
 }
 
-LIR* Arm64Mir2Lir::StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src,
-                                VolatileKind is_volatile) {
-  return StoreBaseDisp(r_base, displacement, As32BitReg(r_src), kReference, is_volatile);
-}
-
 LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
   UNUSED(r_dest, r_src);
   LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 73629e8..2a6dfef 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -918,8 +918,8 @@
 
   RegStorage reg_slow_path = AllocTemp();
   RegStorage reg_disabled = AllocTemp();
-  Load8Disp(reg_class, slow_path_flag_offset, reg_slow_path);
-  Load8Disp(reg_class, disable_flag_offset, reg_disabled);
+  LoadBaseDisp(reg_class, slow_path_flag_offset, reg_slow_path, kSignedByte, kNotVolatile);
+  LoadBaseDisp(reg_class, disable_flag_offset, reg_disabled, kSignedByte, kNotVolatile);
   FreeTemp(reg_class);
   LIR* or_inst = OpRegRegReg(kOpOr, reg_slow_path, reg_slow_path, reg_disabled);
   FreeTemp(reg_disabled);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index c3e9bb5..fabf941 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -959,24 +959,20 @@
     void LoadCurrMethodDirect(RegStorage r_tgt);
     virtual LIR* LoadConstant(RegStorage r_dest, int value);
     // Natural word size.
-    virtual LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
+    LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
       return LoadBaseDisp(r_base, displacement, r_dest, kWord, kNotVolatile);
     }
-    // Load 8 bits, regardless of target.
-    virtual LIR* Load8Disp(RegStorage r_base, int displacement, RegStorage r_dest) {
-      return LoadBaseDisp(r_base, displacement, r_dest, kSignedByte, kNotVolatile);
-    }
     // Load 32 bits, regardless of target.
-    virtual LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
+    LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest)  {
       return LoadBaseDisp(r_base, displacement, r_dest, k32, kNotVolatile);
     }
     // Load a reference at base + displacement and decompress into register.
-    virtual LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+    LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest,
                              VolatileKind is_volatile) {
       return LoadBaseDisp(r_base, displacement, r_dest, kReference, is_volatile);
     }
     // Load a reference at base + index and decompress into register.
-    virtual LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
+    LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
                                 int scale) {
       return LoadBaseIndexed(r_base, r_index, r_dest, scale, kReference);
     }
@@ -993,21 +989,21 @@
     // Load Dalvik value with 64-bit memory storage.
     virtual void LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest);
     // Store an item of natural word size.
-    virtual LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
+    LIR* StoreWordDisp(RegStorage r_base, int displacement, RegStorage r_src) {
       return StoreBaseDisp(r_base, displacement, r_src, kWord, kNotVolatile);
     }
     // Store an uncompressed reference into a compressed 32-bit container.
-    virtual LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src,
+    LIR* StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src,
                               VolatileKind is_volatile) {
       return StoreBaseDisp(r_base, displacement, r_src, kReference, is_volatile);
     }
     // Store an uncompressed reference into a compressed 32-bit container by index.
-    virtual LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
+    LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                                  int scale) {
       return StoreBaseIndexed(r_base, r_index, r_src, scale, kReference);
     }
     // Store 32 bits, regardless of target.
-    virtual LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
+    LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
       return StoreBaseDisp(r_base, displacement, r_src, k32, kNotVolatile);
     }
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 56bed39..7451bd5 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -480,7 +480,7 @@
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
-  VLOG(compiler) << "Before precompile " << GetMemoryUsageString();
+  VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false);
   PreCompile(class_loader, dex_files, thread_pool.get(), timings);
   Compile(class_loader, dex_files, thread_pool.get(), timings);
   if (dump_stats_) {
@@ -577,10 +577,10 @@
 void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                                 ThreadPool* thread_pool, TimingLogger* timings) {
   LoadImageClasses(timings);
-  VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString();
+  VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
   Resolve(class_loader, dex_files, thread_pool, timings);
-  VLOG(compiler) << "Resolve: " << GetMemoryUsageString();
+  VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false);
 
   if (!compiler_options_->IsVerificationEnabled()) {
     VLOG(compiler) << "Verify none mode specified, skipping verification.";
@@ -589,13 +589,13 @@
   }
 
   Verify(class_loader, dex_files, thread_pool, timings);
-  VLOG(compiler) << "Verify: " << GetMemoryUsageString();
+  VLOG(compiler) << "Verify: " << GetMemoryUsageString(false);
 
   InitializeClasses(class_loader, dex_files, thread_pool, timings);
-  VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString();
+  VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false);
 
   UpdateImageClasses(timings);
-  VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString();
+  VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(false);
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
@@ -1983,7 +1983,7 @@
     CHECK(dex_file != nullptr);
     CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
-  VLOG(compiler) << "Compile: " << GetMemoryUsageString();
+  VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
 
 void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, size_t class_def_index) {
@@ -2329,7 +2329,7 @@
   return !compile;
 }
 
-std::string CompilerDriver::GetMemoryUsageString() const {
+std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
   const ArenaPool* arena_pool = GetArenaPool();
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -2345,11 +2345,13 @@
   if (swap_space_.get() != nullptr) {
     oss << " swap=" << PrettySize(swap_space_->GetSize());
   }
-  oss << "\nCode dedupe: " << dedupe_code_.DumpStats();
-  oss << "\nMapping table dedupe: " << dedupe_mapping_table_.DumpStats();
-  oss << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats();
-  oss << "\nGC map dedupe: " << dedupe_gc_map_.DumpStats();
-  oss << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats();
+  if (extended) {
+    oss << "\nCode dedupe: " << dedupe_code_.DumpStats();
+    oss << "\nMapping table dedupe: " << dedupe_mapping_table_.DumpStats();
+    oss << "\nVmap table dedupe: " << dedupe_vmap_table_.DumpStats();
+    oss << "\nGC map dedupe: " << dedupe_gc_map_.DumpStats();
+    oss << "\nCFI info dedupe: " << dedupe_cfi_info_.DumpStats();
+  }
   return oss.str();
 }
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 11b4329..a86043c 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -400,7 +400,7 @@
   bool SkipCompilation(const std::string& method_name);
 
   // Get memory usage during compilation.
-  std::string GetMemoryUsageString() const;
+  std::string GetMemoryUsageString(bool extended) const;
 
  private:
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 8c7d611..d69447d 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -501,19 +501,14 @@
   size_t total_strings = 0;
   gc::Heap* heap = Runtime::Current()->GetHeap();
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
-  {
-    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-    heap->VisitObjects(CountStringsCallback, &total_strings);  // Count the strings.
-  }
+  // Count the strings.
+  heap->VisitObjects(CountStringsCallback, &total_strings);
   Thread* self = Thread::Current();
   StackHandleScope<1> hs(self);
   auto strings = hs.NewHandle(cl->AllocStringArray(self, total_strings));
   StringCollector string_collector(strings, 0U);
-  {
-    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-    // Read strings into the array.
-    heap->VisitObjects(StringCollector::Callback, &string_collector);
-  }
+  // Read strings into the array.
+  heap->VisitObjects(StringCollector::Callback, &string_collector);
   // Some strings could have gotten freed if AllocStringArray caused a GC.
   CHECK_LE(string_collector.GetIndex(), total_strings);
   total_strings = string_collector.GetIndex();
@@ -561,8 +556,10 @@
   }
   CHECK_EQ(pos, num_chars);
 
-  LOG(INFO) << "Total # image strings=" << total_strings << " combined length="
-      << num_chars << " prefix saved chars=" << prefix_saved_chars;
+  if (kIsDebugBuild || VLOG_IS_ON(compiler)) {
+    LOG(INFO) << "Total # image strings=" << total_strings << " combined length="
+        << num_chars << " prefix saved chars=" << prefix_saved_chars;
+  }
   ComputeEagerResolvedStrings();
 }
 
@@ -595,7 +592,6 @@
 }
 
 void ImageWriter::ComputeEagerResolvedStrings() {
-  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
 
@@ -668,7 +664,6 @@
 void ImageWriter::CheckNonImageClassesRemoved() {
   if (compiler_driver_.GetImageClasses() != nullptr) {
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     heap->VisitObjects(CheckNonImageClassesRemovedCallback, this);
   }
 }
@@ -869,17 +864,14 @@
   // know where image_roots is going to end up
   image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
 
-  {
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    // TODO: Image spaces only?
-    DCHECK_LT(image_end_, image_->Size());
-    image_objects_offset_begin_ = image_end_;
-    // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
-    heap->VisitObjects(WalkFieldsCallback, this);
-    // Transform each object's bin slot into an offset which will be used to do the final copy.
-    heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
-    DCHECK(saved_hashes_map_.empty());  // All binslot hashes should've been put into vector by now.
-  }
+  // TODO: Image spaces only?
+  DCHECK_LT(image_end_, image_->Size());
+  image_objects_offset_begin_ = image_end_;
+  // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
+  heap->VisitObjects(WalkFieldsCallback, this);
+  // Transform each object's bin slot into an offset which will be used to do the final copy.
+  heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
+  DCHECK(saved_hashes_map_.empty());  // All binslot hashes should've been put into vector by now.
 
   DCHECK_GT(image_end_, GetBinSizeSum());
 
@@ -920,7 +912,6 @@
   // TODO: heap validation can't handle this fix up pass
   heap->DisableObjectValidation();
   // TODO: Image spaces only?
-  WriterMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   heap->VisitObjects(CopyAndFixupObjectsCallback, this);
   // Fix up the object previously had hash codes.
   for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index b576ca2..fe3a978 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1640,7 +1640,7 @@
   void LogCompletionTime() {
     LOG(INFO) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
               << " (threads: " << thread_count_ << ") "
-              << driver_->GetMemoryUsageString();
+              << driver_->GetMemoryUsageString(kIsDebugBuild || VLOG_IS_ON(compiler));
   }
 
   std::unique_ptr<CompilerOptions> compiler_options_;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 05b6b1d..00f3855 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1714,7 +1714,6 @@
   // Set entry point to interpreter if in InterpretOnly mode.
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     heap->VisitObjects(InitFromImageInterpretOnlyCallback, this);
   }
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index c595de7..c63e2d7 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2194,6 +2194,7 @@
     case kWaitingForJniOnLoad:
     case kWaitingForMethodTracingStart:
     case kWaitingForSignalCatcherOutput:
+    case kWaitingForVisitObjects:
     case kWaitingInMainDebuggerLoop:
     case kWaitingInMainSignalCatcherLoop:
     case kWaitingPerformingGc:
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 553a5d3..eed9352 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -704,8 +704,29 @@
 }
 
 void Heap::VisitObjects(ObjectCallback callback, void* arg) {
-  // GCs can move objects, so don't allow this.
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "Visiting objects");
+  Thread* self = Thread::Current();
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // Threads are already suspended.
+    VisitObjectsInternal(callback, arg);
+  } else if (IsGcConcurrent() && IsMovingGc(collector_type_)) {
+    // Concurrent moving GC. Suspend all threads and visit objects.
+    DCHECK_EQ(collector_type_, foreground_collector_type_);
+    DCHECK_EQ(foreground_collector_type_, background_collector_type_)
+        << "Assume no transition such that collector_type_ won't change";
+    self->TransitionFromRunnableToSuspended(kWaitingForVisitObjects);
+    ThreadList* tl = Runtime::Current()->GetThreadList();
+    tl->SuspendAll();
+    VisitObjectsInternal(callback, arg);
+    tl->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  } else {
+    // GCs can move objects, so don't allow this.
+    ScopedAssertNoThreadSuspension ants(self, "Visiting objects");
+    VisitObjectsInternal(callback, arg);
+  }
+}
+
+void Heap::VisitObjectsInternal(ObjectCallback callback, void* arg) {
   if (bump_pointer_space_ != nullptr) {
     // Visit objects in bump pointer space.
     bump_pointer_space_->Walk(callback, arg);
@@ -721,7 +742,10 @@
       callback(obj, arg);
     }
   }
-  GetLiveBitmap()->Walk(callback, arg);
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    GetLiveBitmap()->Walk(callback, arg);
+  }
 }
 
 void Heap::MarkAllocStackAsLive(accounting::ObjectStack* stack) {
@@ -1459,10 +1483,7 @@
 
 void Heap::CountInstances(const std::vector<mirror::Class*>& classes, bool use_is_assignable_from,
                           uint64_t* counts) {
-  // Can't do any GC in this function since this may move classes.
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "CountInstances");
   InstanceCounter counter(classes, use_is_assignable_from, counts);
-  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(InstanceCounter::Callback, &counter);
 }
 
@@ -1493,10 +1514,7 @@
 
 void Heap::GetInstances(mirror::Class* c, int32_t max_count,
                         std::vector<mirror::Object*>& instances) {
-  // Can't do any GC in this function since this may move classes.
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "GetInstances");
   InstanceCollector collector(c, max_count, instances);
-  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(&InstanceCollector::Callback, &collector);
 }
 
@@ -1538,10 +1556,7 @@
 
 void Heap::GetReferringObjects(mirror::Object* o, int32_t max_count,
                                std::vector<mirror::Object*>& referring_objects) {
-  // Can't do any GC in this function since this may move the object o.
-  ScopedAssertNoThreadSuspension ants(Thread::Current(), "GetReferringObjects");
   ReferringObjectsFinder finder(o, max_count, referring_objects);
-  ReaderMutexLock mu(ants.Self(), *Locks::heap_bitmap_lock_);
   VisitObjects(&ReferringObjectsFinder::Callback, &finder);
 }
 
@@ -2702,7 +2717,6 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, timings);
   if (verify_pre_gc_heap_) {
     TimingLogger::ScopedTiming t2("(Paused)PreGcVerifyHeapReferences", timings);
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     size_t failures = VerifyHeapReferences();
     if (failures > 0) {
       LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed with " << failures
@@ -2754,9 +2768,11 @@
   if (verify_pre_sweeping_heap_) {
     TimingLogger::ScopedTiming t2("(Paused)PostSweepingVerifyHeapReferences", timings);
     CHECK_NE(self->GetState(), kRunnable);
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    // Swapping bound bitmaps does nothing.
-    gc->SwapBitmaps();
+    {
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+      // Swapping bound bitmaps does nothing.
+      gc->SwapBitmaps();
+    }
     // Pass in false since concurrent reference processing can mean that the reference referents
     // may point to dead objects at the point which PreSweepingGcVerification is called.
     size_t failures = VerifyHeapReferences(false);
@@ -2764,7 +2780,10 @@
       LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed with " << failures
           << " failures";
     }
-    gc->SwapBitmaps();
+    {
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+      gc->SwapBitmaps();
+    }
   }
   if (verify_pre_sweeping_rosalloc_) {
     RosAllocVerification(timings, "PreSweepingRosAllocVerification");
@@ -2786,7 +2805,6 @@
   }
   if (verify_post_gc_heap_) {
     TimingLogger::ScopedTiming t2("(Paused)PostGcVerifyHeapReferences", timings);
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     size_t failures = VerifyHeapReferences();
     if (failures > 0) {
       LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed with " << failures
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index fc61fc5..36a3767 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -216,7 +216,11 @@
 
   // Visit all of the live objects in the heap.
   void VisitObjects(ObjectCallback callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void VisitObjectsInternal(ObjectCallback callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -245,7 +249,7 @@
   void VerifyHeap() LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
   // Returns how many failures occured.
   size_t VerifyHeapReferences(bool verify_referents = true)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool VerifyMissingCardMarks()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
@@ -741,7 +745,8 @@
   void PrePauseRosAllocVerification(collector::GarbageCollector* gc)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void PreSweepingGcVerification(collector::GarbageCollector* gc)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
   void PostGcVerification(collector::GarbageCollector* gc)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void PostGcVerificationPaused(collector::GarbageCollector* gc)
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 040757b..0b04276 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -421,7 +421,6 @@
   void Dump()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) {
-    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     // First pass to measure the size of the dump.
     size_t overall_size;
     size_t max_length;
@@ -487,8 +486,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ProcessHeap(EndianOutput* output, bool header_first)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Reset current heap and object count.
     current_heap_ = HPROF_HEAP_DEFAULT;
     objects_in_segment_ = 0;
@@ -502,8 +500,7 @@
     }
   }
 
-  void ProcessBody(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+  void ProcessBody(EndianOutput* output) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Runtime* runtime = Runtime::Current();
     // Walk the roots and the heap.
     output->StartNewRecord(HPROF_TAG_HEAP_DUMP_SEGMENT, kHprofTime);
@@ -646,8 +643,7 @@
   }
 
   bool DumpToDdmsBuffered(size_t overall_size ATTRIBUTE_UNUSED, size_t max_length ATTRIBUTE_UNUSED)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     LOG(FATAL) << "Unimplemented";
     UNREACHABLE();
     //        // Send the data off to DDMS.
@@ -660,8 +656,7 @@
   }
 
   bool DumpToFile(size_t overall_size, size_t max_length)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Where exactly are we writing to?
     int out_fd;
     if (fd_ >= 0) {
@@ -708,8 +703,7 @@
   }
 
   bool DumpToDdmsDirect(size_t overall_size, size_t max_length, uint32_t chunk_type)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(direct_to_ddms_);
     JDWP::JdwpState* state = Dbg::GetJdwpState();
     CHECK(state != nullptr);
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 760eb9b..e4b8db1 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -88,6 +88,7 @@
     case kWaitingForSignalCatcherOutput:  return kJavaWaiting;
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
     case kWaitingForMethodTracingStart:   return kJavaWaiting;
+    case kWaitingForVisitObjects:         return kJavaWaiting;
     case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
   }
diff --git a/runtime/thread_state.h b/runtime/thread_state.h
index 6e5deeb..b5479ed 100644
--- a/runtime/thread_state.h
+++ b/runtime/thread_state.h
@@ -41,6 +41,7 @@
   kWaitingInMainSignalCatcherLoop,  // WAITING        TS_WAIT      blocking/reading/processing signals
   kWaitingForDeoptimization,        // WAITING        TS_WAIT      waiting for deoptimization suspend all
   kWaitingForMethodTracingStart,    // WAITING        TS_WAIT      waiting for method tracing to start
+  kWaitingForVisitObjects,          // WAITING        TS_WAIT      waiting for visiting objects
   kStarting,                        // NEW            TS_WAIT      native thread started, not yet ready to run managed code
   kNative,                          // RUNNABLE       TS_RUNNING   running in a JNI native method
   kSuspended,                       // RUNNABLE       TS_RUNNING   suspended by GC or debugger