Merge "Load shadow frame's this object only upon instrumentation." into dalvik-dev
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index fa60818..72ae91e 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -283,11 +283,28 @@
         need_flush = true;
       }
 
-      // For wide args, force flush if only half is promoted
+      // For wide args, force flush if not fully promoted
       if (t_loc->wide) {
         PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
+        // Is only half promoted?
         need_flush |= (p_map->core_location != v_map->core_location) ||
             (p_map->fp_location != v_map->fp_location);
+        if ((cu_->instruction_set == kThumb2) && t_loc->fp && !need_flush) {
+          /*
+           * In Arm, a double is represented as a pair of consecutive single float
+           * registers starting at an even number.  It's possible that both Dalvik vRegs
+           * representing the incoming double were independently promoted as singles - but
+           * not in a form usable as a double.  If so, we need to flush - even though the
+           * incoming arg appears fully in register.  At this point in the code, both
+           * halves of the double are promoted.  Make sure they are in a usable form.
+           */
+          int lowreg_index = start_vreg + i + (t_loc->high_word ? -1 : 0);
+          int low_reg = promotion_map_[lowreg_index].FpReg;
+          int high_reg = promotion_map_[lowreg_index + 1].FpReg;
+          if (((low_reg & 0x1) != 0) || (high_reg != (low_reg + 1))) {
+            need_flush = true;
+          }
+        }
       }
       if (need_flush) {
         StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index fdbc1d0..401e3d5 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -158,6 +158,10 @@
 #define ENCODE_ALL              (~0ULL)
 #define ENCODE_MEM              (ENCODE_DALVIK_REG | ENCODE_LITERAL | \
                                  ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS)
+
+// Mask to denote sreg as the start of a double.  Must not interfere with low 16 bits.
+#define STARTING_DOUBLE_SREG 0x10000
+
 // TODO: replace these macros
 #define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath))
 #define SLOW_INVOKE_PATH (cu_->enable_debug & (1 << kDebugSlowInvokePath))
@@ -187,7 +191,6 @@
     struct RefCounts {
       int count;
       int s_reg;
-      bool double_start;   // Starting v_reg for a double
     };
 
     /*
@@ -324,11 +327,9 @@
     void RecordCorePromotion(int reg, int s_reg);
     int AllocPreservedCoreReg(int s_reg);
     void RecordFpPromotion(int reg, int s_reg);
-    int AllocPreservedSingle(int s_reg, bool even);
+    int AllocPreservedSingle(int s_reg);
     int AllocPreservedDouble(int s_reg);
-    int AllocPreservedFPReg(int s_reg, bool double_start);
-    int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp,
-                      bool required);
+    int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required);
     int AllocTempDouble();
     int AllocFreeTemp();
     int AllocTemp();
@@ -367,7 +368,7 @@
     RegLocation UpdateRawLoc(RegLocation loc);
     RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
     RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
-    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts);
+    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
     void DoPromotion();
     int VRegOffset(int v_reg);
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index a0f22fc..7927ff9 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -174,17 +174,12 @@
   promotion_map_[p_map_idx].FpReg = reg;
 }
 
-/*
- * Reserve a callee-save fp single register.  Try to fullfill request for
- * even/odd  allocation, but go ahead and allocate anything if not
- * available.  If nothing's available, return -1.
- */
-int Mir2Lir::AllocPreservedSingle(int s_reg, bool even) {
-  int res = -1;
+// Reserve a callee-save fp single register.
+int Mir2Lir::AllocPreservedSingle(int s_reg) {
+  int res = -1;  // Return code if none available.
   RegisterInfo* FPRegs = reg_pool_->FPRegs;
   for (int i = 0; i < reg_pool_->num_fp_regs; i++) {
-    if (!FPRegs[i].is_temp && !FPRegs[i].in_use &&
-      ((FPRegs[i].reg & 0x1) == 0) == even) {
+    if (!FPRegs[i].is_temp && !FPRegs[i].in_use) {
       res = FPRegs[i].reg;
       RecordFpPromotion(res, s_reg);
       break;
@@ -250,26 +245,6 @@
   return res;
 }
 
-
-/*
- * Reserve a callee-save fp register.   If this register can be used
- * as the first of a double, attempt to allocate an even pair of fp
- * single regs (but if can't still attempt to allocate a single, preferring
- * first to allocate an odd register.
- */
-int Mir2Lir::AllocPreservedFPReg(int s_reg, bool double_start) {
-  int res = -1;
-  if (double_start) {
-    res = AllocPreservedDouble(s_reg);
-  }
-  if (res == -1) {
-    res = AllocPreservedSingle(s_reg, false /* try odd # */);
-  }
-  if (res == -1)
-    res = AllocPreservedSingle(s_reg, true /* try even # */);
-  return res;
-}
-
 int Mir2Lir::AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp,
                            bool required) {
   int next = *next_temp;
@@ -872,18 +847,22 @@
 }
 
 /* USE SSA names to count references of base Dalvik v_regs. */
-void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts) {
+void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
     RegLocation loc = mir_graph_->reg_location_[i];
     RefCounts* counts = loc.fp ? fp_counts : core_counts;
     int p_map_idx = SRegToPMap(loc.s_reg_low);
-    // Don't count easily regenerated immediates
-    if (loc.fp || !IsInexpensiveConstant(loc)) {
+    if (loc.fp) {
+      if (loc.wide) {
+        // Treat doubles as a unit, using upper half of fp_counts array.
+        counts[p_map_idx + num_regs].count += mir_graph_->GetUseCount(i);
+        i++;
+      } else {
+        counts[p_map_idx].count += mir_graph_->GetUseCount(i);
+      }
+    } else if (!IsInexpensiveConstant(loc)) {
       counts[p_map_idx].count += mir_graph_->GetUseCount(i);
     }
-    if (loc.wide && loc.fp && !loc.high_word) {
-      counts[p_map_idx].double_start = true;
-    }
   }
 }
 
@@ -902,7 +881,11 @@
 void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) {
   LOG(INFO) << msg;
   for (int i = 0; i < size; i++) {
-    LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count;
+    if ((arr[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
+      LOG(INFO) << "s_reg[D" << (arr[i].s_reg & ~STARTING_DOUBLE_SREG) << "]: " << arr[i].count;
+    } else {
+      LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count;
+    }
   }
 }
 
@@ -925,7 +908,7 @@
    * count based on original Dalvik register name.  Count refs
    * separately based on type in order to give allocation
    * preference to fp doubles - which must be allocated sequential
-   * physical single fp registers started with an even-numbered
+   * physical single fp registers starting with an even-numbered
    * reg.
    * TUNING: replace with linear scan once we have the ability
    * to describe register live ranges for GC.
@@ -934,7 +917,7 @@
       static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs,
                                             ArenaAllocator::kAllocRegAlloc));
   RefCounts *FpRegs =
-      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs,
+      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2,
                                              ArenaAllocator::kAllocRegAlloc));
   // Set ssa names for original Dalvik registers
   for (int i = 0; i < dalvik_regs; i++) {
@@ -942,46 +925,49 @@
   }
   // Set ssa name for Method*
   core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();
-  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy
+  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy.
+  FpRegs[dalvik_regs + num_regs].s_reg = mir_graph_->GetMethodSReg();  // for consistency.
   // Set ssa names for compiler_temps
   for (int i = 1; i <= cu_->num_compiler_temps; i++) {
     CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i);
     core_regs[dalvik_regs + i].s_reg = ct->s_reg;
     FpRegs[dalvik_regs + i].s_reg = ct->s_reg;
+    FpRegs[num_regs + dalvik_regs + i].s_reg = ct->s_reg;
+  }
+
+  // Duplicate in upper half to represent possible fp double starting sregs.
+  for (int i = 0; i < num_regs; i++) {
+    FpRegs[num_regs + i].s_reg = FpRegs[i].s_reg | STARTING_DOUBLE_SREG;
   }
 
   // Sum use counts of SSA regs by original Dalvik vreg.
-  CountRefs(core_regs, FpRegs);
+  CountRefs(core_regs, FpRegs, num_regs);
 
-  /*
-   * Ideally, we'd allocate doubles starting with an even-numbered
-   * register.  Bias the counts to try to allocate any vreg that's
-   * used as the start of a pair first.
-   */
-  for (int i = 0; i < num_regs; i++) {
-    if (FpRegs[i].double_start) {
-      FpRegs[i].count *= 2;
-    }
-  }
 
   // Sort the count arrays
   qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts);
-  qsort(FpRegs, num_regs, sizeof(RefCounts), SortCounts);
+  qsort(FpRegs, num_regs * 2, sizeof(RefCounts), SortCounts);
 
   if (cu_->verbose) {
     DumpCounts(core_regs, num_regs, "Core regs after sort");
-    DumpCounts(FpRegs, num_regs, "Fp regs after sort");
+    DumpCounts(FpRegs, num_regs * 2, "Fp regs after sort");
   }
 
   if (!(cu_->disable_opt & (1 << kPromoteRegs))) {
     // Promote FpRegs
-    for (int i = 0; (i < num_regs) && (FpRegs[i].count >= promotion_threshold); i++) {
-      int p_map_idx = SRegToPMap(FpRegs[i].s_reg);
-      if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
-        int reg = AllocPreservedFPReg(FpRegs[i].s_reg,
-          FpRegs[i].double_start);
+    for (int i = 0; (i < (num_regs * 2)) && (FpRegs[i].count >= promotion_threshold); i++) {
+      int p_map_idx = SRegToPMap(FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG);
+      if ((FpRegs[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
+        if ((promotion_map_[p_map_idx].fp_location != kLocPhysReg) &&
+            (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg)) {
+          int low_sreg = FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG;
+          // Ignore result - if can't alloc double may still be able to alloc singles.
+          AllocPreservedDouble(low_sreg);
+        }
+      } else if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
+        int reg = AllocPreservedSingle(FpRegs[i].s_reg);
         if (reg < 0) {
-          break;  // No more left
+          break;  // No more left.
         }
       }
     }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 37b62ad..c19f872 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -192,7 +192,8 @@
       class_roots_(NULL),
       array_iftable_(NULL),
       init_done_(false),
-      is_dirty_(false),
+      dex_caches_dirty_(false),
+      class_table_dirty_(false),
       intern_table_(intern_table),
       portable_resolution_trampoline_(NULL),
       quick_resolution_trampoline_(NULL) {
@@ -1088,30 +1089,40 @@
 // Keep in sync with InitCallback. Anything we visit, we need to
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
-void ClassLinker::VisitRoots(RootVisitor* visitor, void* arg, bool clean_dirty) {
-  visitor(class_roots_, arg);
+void ClassLinker::VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty) {
+  class_roots_ = down_cast<mirror::ObjectArray<mirror::Class>*>(visitor(class_roots_, arg));
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
-    for (mirror::DexCache* dex_cache : dex_caches_) {
-      visitor(dex_cache, arg);
+    if (!only_dirty || dex_caches_dirty_) {
+      for (mirror::DexCache*& dex_cache : dex_caches_) {
+        dex_cache = down_cast<mirror::DexCache*>(visitor(dex_cache, arg));
+        DCHECK(dex_cache != nullptr);
+      }
+      if (clean_dirty) {
+        dex_caches_dirty_ = false;
+      }
     }
   }
 
   {
     ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    for (const std::pair<size_t, mirror::Class*>& it : class_table_) {
-      visitor(it.second, arg);
+    if (!only_dirty || class_table_dirty_) {
+      for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
+        it.second = down_cast<mirror::Class*>(visitor(it.second, arg));
+        DCHECK(it.second != nullptr);
+      }
+      if (clean_dirty) {
+        class_table_dirty_ = false;
+      }
     }
 
     // We deliberately ignore the class roots in the image since we
     // handle image roots by using the MS/CMS rescanning of dirty cards.
   }
 
-  visitor(array_iftable_, arg);
-  if (clean_dirty) {
-    is_dirty_ = false;
-  }
+  array_iftable_ = reinterpret_cast<mirror::IfTable*>(visitor(array_iftable_, arg));
+  DCHECK(array_iftable_ != nullptr);
 }
 
 void ClassLinker::VisitClasses(ClassVisitor* visitor, void* arg) {
@@ -1928,7 +1939,7 @@
   CHECK(dex_cache->GetLocation()->Equals(dex_file.GetLocation()));
   dex_caches_.push_back(dex_cache.get());
   dex_cache->SetDexFile(&dex_file);
-  Dirty();
+  dex_caches_dirty_ = true;
 }
 
 void ClassLinker::RegisterDexFile(const DexFile& dex_file) {
@@ -2203,7 +2214,7 @@
   }
   Runtime::Current()->GetHeap()->VerifyObject(klass);
   class_table_.insert(std::make_pair(hash, klass));
-  Dirty();
+  class_table_dirty_ = true;
   return NULL;
 }
 
@@ -2316,7 +2327,7 @@
       }
     }
   }
-  Dirty();
+  class_table_dirty_ = true;
   dex_cache_image_class_lookup_required_ = false;
   self->EndAssertNoThreadSuspension(old_no_suspend_cause);
 }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index c5fb72c..20efbb4 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -231,7 +231,7 @@
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VisitRoots(RootVisitor* visitor, void* arg, bool clean_dirty)
+  void VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty)
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_, dex_lock_);
 
   mirror::DexCache* FindDexCache(const DexFile& dex_file) const
@@ -335,14 +335,6 @@
   pid_t GetClassesLockOwner();  // For SignalCatcher.
   pid_t GetDexLockOwner();  // For SignalCatcher.
 
-  bool IsDirty() const {
-    return is_dirty_;
-  }
-
-  void Dirty() {
-    is_dirty_ = true;
-  }
-
   const void* GetPortableResolutionTrampoline() const {
     return portable_resolution_trampoline_;
   }
@@ -617,7 +609,8 @@
   mirror::IfTable* array_iftable_;
 
   bool init_done_;
-  bool is_dirty_;
+  bool dex_caches_dirty_ GUARDED_BY(dex_lock_);
+  bool class_table_dirty_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
   InternTable* intern_table_;
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 6442f5a..0fa0ffb 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -331,7 +331,7 @@
       const char* descriptor = dex->GetTypeDescriptor(type_id);
       AssertDexFileClass(class_loader, descriptor);
     }
-    class_linker_->VisitRoots(TestRootVisitor, NULL, false);
+    class_linker_->VisitRoots(TestRootVisitor, NULL, false, false);
     // Verify the dex cache has resolution methods in all resolved method slots
     mirror::DexCache* dex_cache = class_linker_->FindDexCache(*dex);
     mirror::ObjectArray<mirror::ArtMethod>* resolved_methods = dex_cache->GetResolvedMethods();
@@ -340,8 +340,9 @@
     }
   }
 
-  static void TestRootVisitor(const mirror::Object* root, void*) {
+  static mirror::Object* TestRootVisitor(mirror::Object* root, void*) {
     EXPECT_TRUE(root != NULL);
+    return root;
   }
 };
 
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index c5e8812..0cfbd6f 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -64,10 +64,9 @@
   byte* aligned_end = card_end -
       (reinterpret_cast<uintptr_t>(card_end) & (sizeof(uintptr_t) - 1));
 
-  // Now we have the words, we can send these to be processed in parallel.
-  uintptr_t* word_cur = reinterpret_cast<uintptr_t*>(card_cur);
   uintptr_t* word_end = reinterpret_cast<uintptr_t*>(aligned_end);
-  for (;;) {
+  for (uintptr_t* word_cur = reinterpret_cast<uintptr_t*>(card_cur); word_cur < word_end;
+      ++word_cur) {
     while (LIKELY(*word_cur == 0)) {
       ++word_cur;
       if (UNLIKELY(word_cur >= word_end)) {
@@ -78,6 +77,8 @@
     // Find the first dirty card.
     uintptr_t start_word = *word_cur;
     uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(reinterpret_cast<byte*>(word_cur)));
+    // TODO: Investigate if processing continuous runs of dirty cards with a single bitmap visit is
+    // more efficient.
     for (size_t i = 0; i < sizeof(uintptr_t); ++i) {
       if (static_cast<byte>(start_word) >= minimum_age) {
         auto* card = reinterpret_cast<byte*>(word_cur) + i;
@@ -88,7 +89,6 @@
       start_word >>= 8;
       start += kCardSize;
     }
-    ++word_cur;
   }
   exit_for:
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 953fbf9..5d9db83 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -497,24 +497,18 @@
   }
 }
 
-void MarkSweep::MarkRootParallelCallback(const Object* root, void* arg) {
+Object* MarkSweep::MarkRootParallelCallback(Object* root, void* arg) {
   DCHECK(root != NULL);
   DCHECK(arg != NULL);
   reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNullParallel(root);
+  return root;
 }
 
-void MarkSweep::MarkObjectCallback(const Object* root, void* arg) {
-  DCHECK(root != NULL);
-  DCHECK(arg != NULL);
-  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  mark_sweep->MarkObjectNonNull(root);
-}
-
-void MarkSweep::ReMarkObjectVisitor(const Object* root, void* arg) {
-  DCHECK(root != NULL);
-  DCHECK(arg != NULL);
-  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
-  mark_sweep->MarkObjectNonNull(root);
+Object* MarkSweep::MarkRootCallback(Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  reinterpret_cast<MarkSweep*>(arg)->MarkObjectNonNull(root);
+  return root;
 }
 
 void MarkSweep::VerifyRootCallback(const Object* root, void* arg, size_t vreg,
@@ -542,20 +536,20 @@
 // Marks all objects in the root set.
 void MarkSweep::MarkRoots() {
   timings_.StartSplit("MarkRoots");
-  Runtime::Current()->VisitNonConcurrentRoots(MarkObjectCallback, this);
+  Runtime::Current()->VisitNonConcurrentRoots(MarkRootCallback, this);
   timings_.EndSplit();
 }
 
 void MarkSweep::MarkNonThreadRoots() {
   timings_.StartSplit("MarkNonThreadRoots");
-  Runtime::Current()->VisitNonThreadRoots(MarkObjectCallback, this);
+  Runtime::Current()->VisitNonThreadRoots(MarkRootCallback, this);
   timings_.EndSplit();
 }
 
 void MarkSweep::MarkConcurrentRoots() {
   timings_.StartSplit("MarkConcurrentRoots");
   // Visit all runtime roots and clear dirty flags.
-  Runtime::Current()->VisitConcurrentRoots(MarkObjectCallback, this, false, true);
+  Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, false, true);
   timings_.EndSplit();
 }
 
@@ -950,10 +944,12 @@
   ProcessMarkStack(false);
 }
 
-bool MarkSweep::IsMarkedCallback(const Object* object, void* arg) {
-  return
-      reinterpret_cast<MarkSweep*>(arg)->IsMarked(object) ||
-      !reinterpret_cast<MarkSweep*>(arg)->GetHeap()->GetLiveBitmap()->Test(object);
+mirror::Object* MarkSweep::SystemWeakIsMarkedCallback(Object* object, void* arg) {
+  if (reinterpret_cast<MarkSweep*>(arg)->IsMarked(object) ||
+      !reinterpret_cast<MarkSweep*>(arg)->GetHeap()->GetLiveBitmap()->Test(object)) {
+    return object;
+  }
+  return nullptr;
 }
 
 void MarkSweep::RecursiveMarkDirtyObjects(bool paused, byte minimum_age) {
@@ -963,33 +959,26 @@
 
 void MarkSweep::ReMarkRoots() {
   timings_.StartSplit("ReMarkRoots");
-  Runtime::Current()->VisitRoots(ReMarkObjectVisitor, this, true, true);
+  Runtime::Current()->VisitRoots(MarkRootCallback, this, true, true);
   timings_.EndSplit();
 }
 
-void MarkSweep::SweepJniWeakGlobals(IsMarkedTester is_marked, void* arg) {
-  JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-  WriterMutexLock mu(Thread::Current(), vm->weak_globals_lock);
-  for (const Object** entry : vm->weak_globals) {
-    if (!is_marked(*entry, arg)) {
-      *entry = kClearedJniWeakGlobal;
-    }
-  }
-}
-
 struct ArrayMarkedCheck {
   accounting::ObjectStack* live_stack;
   MarkSweep* mark_sweep;
 };
 
 // Either marked or not live.
-bool MarkSweep::IsMarkedArrayCallback(const Object* object, void* arg) {
+mirror::Object* MarkSweep::SystemWeakIsMarkedArrayCallback(Object* object, void* arg) {
   ArrayMarkedCheck* array_check = reinterpret_cast<ArrayMarkedCheck*>(arg);
   if (array_check->mark_sweep->IsMarked(object)) {
-    return true;
+    return object;
   }
   accounting::ObjectStack* live_stack = array_check->live_stack;
-  return std::find(live_stack->Begin(), live_stack->End(), object) == live_stack->End();
+  if (std::find(live_stack->Begin(), live_stack->End(), object) == live_stack->End()) {
+    return object;
+  }
+  return nullptr;
 }
 
 void MarkSweep::SweepSystemWeaksArray(accounting::ObjectStack* allocations) {
@@ -999,14 +988,11 @@
   // !IsMarked && IsLive
   // So compute !(!IsMarked && IsLive) which is equal to (IsMarked || !IsLive).
   // Or for swapped (IsLive || !IsMarked).
-
   timings_.StartSplit("SweepSystemWeaksArray");
   ArrayMarkedCheck visitor;
   visitor.live_stack = allocations;
   visitor.mark_sweep = this;
-  runtime->GetInternTable()->SweepInternTableWeaks(IsMarkedArrayCallback, &visitor);
-  runtime->GetMonitorList()->SweepMonitorList(IsMarkedArrayCallback, &visitor);
-  SweepJniWeakGlobals(IsMarkedArrayCallback, &visitor);
+  runtime->SweepSystemWeaks(SystemWeakIsMarkedArrayCallback, &visitor);
   timings_.EndSplit();
 }
 
@@ -1018,16 +1004,14 @@
   // So compute !(!IsMarked && IsLive) which is equal to (IsMarked || !IsLive).
   // Or for swapped (IsLive || !IsMarked).
   timings_.StartSplit("SweepSystemWeaks");
-  runtime->GetInternTable()->SweepInternTableWeaks(IsMarkedCallback, this);
-  runtime->GetMonitorList()->SweepMonitorList(IsMarkedCallback, this);
-  SweepJniWeakGlobals(IsMarkedCallback, this);
+  runtime->SweepSystemWeaks(SystemWeakIsMarkedCallback, this);
   timings_.EndSplit();
 }
 
-bool MarkSweep::VerifyIsLiveCallback(const Object* obj, void* arg) {
+mirror::Object* MarkSweep::VerifySystemWeakIsLiveCallback(Object* obj, void* arg) {
   reinterpret_cast<MarkSweep*>(arg)->VerifyIsLive(obj);
   // We don't actually want to sweep the object, so lets return "marked"
-  return true;
+  return obj;
 }
 
 void MarkSweep::VerifyIsLive(const Object* obj) {
@@ -1046,16 +1030,8 @@
 }
 
 void MarkSweep::VerifySystemWeaks() {
-  Runtime* runtime = Runtime::Current();
-  // Verify system weaks, uses a special IsMarked callback which always returns true.
-  runtime->GetInternTable()->SweepInternTableWeaks(VerifyIsLiveCallback, this);
-  runtime->GetMonitorList()->SweepMonitorList(VerifyIsLiveCallback, this);
-
-  JavaVMExt* vm = runtime->GetJavaVM();
-  ReaderMutexLock mu(Thread::Current(), vm->weak_globals_lock);
-  for (const Object** entry : vm->weak_globals) {
-    VerifyIsLive(*entry);
-  }
+  // Verify system weaks, uses a special object visitor which returns the input object.
+  Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this);
 }
 
 struct SweepCallbackContext {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index fdd0c86..a857dab 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -208,7 +208,7 @@
   void SweepSystemWeaksArray(accounting::ObjectStack* allocations)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static bool VerifyIsLiveCallback(const mirror::Object* obj, void* arg)
+  static mirror::Object* VerifySystemWeakIsLiveCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   void VerifySystemWeaks()
@@ -223,11 +223,11 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
 
-  static void MarkObjectCallback(const mirror::Object* root, void* arg)
+  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void MarkRootParallelCallback(const mirror::Object* root, void* arg);
+  static mirror::Object* MarkRootParallelCallback(mirror::Object* root, void* arg);
 
   // Marks an object.
   void MarkObject(const mirror::Object* obj)
@@ -246,16 +246,12 @@
   // Returns true if the object has its bit set in the mark bitmap.
   bool IsMarked(const mirror::Object* object) const;
 
-  static bool IsMarkedCallback(const mirror::Object* object, void* arg)
+  static mirror::Object* SystemWeakIsMarkedCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static bool IsMarkedArrayCallback(const mirror::Object* object, void* arg)
+  static mirror::Object* SystemWeakIsMarkedArrayCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  static void ReMarkObjectVisitor(const mirror::Object* root, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
@@ -394,9 +390,6 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SweepJniWeakGlobals(IsMarkedTester is_marked, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Whether or not we count how many of each type of object were scanned.
   static const bool kCountScannedTypes = false;
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e0048a0..916d38e 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1322,11 +1322,12 @@
   image_mod_union_table_->MarkReferences(mark_sweep);
 }
 
-static void RootMatchesObjectVisitor(const mirror::Object* root, void* arg) {
+static mirror::Object* RootMatchesObjectVisitor(mirror::Object* root, void* arg) {
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(arg);
   if (root == obj) {
     LOG(INFO) << "Object " << obj << " is a root";
   }
+  return root;
 }
 
 class ScanVisitor {
@@ -1414,9 +1415,10 @@
     return heap_->IsLiveObjectLocked(obj);
   }
 
-  static void VerifyRoots(const mirror::Object* root, void* arg) {
+  static mirror::Object* VerifyRoots(mirror::Object* root, void* arg) {
     VerifyReferenceVisitor* visitor = reinterpret_cast<VerifyReferenceVisitor*>(arg);
-    (*visitor)(NULL, root, MemberOffset(0), true);
+    (*visitor)(nullptr, root, MemberOffset(0), true);
+    return root;
   }
 
  private:
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 0b2e741..67620a0 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -484,11 +484,11 @@
   }
 
  private:
-  static void RootVisitor(const mirror::Object* obj, void* arg)
+  static mirror::Object* RootVisitor(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(arg != NULL);
-    Hprof* hprof = reinterpret_cast<Hprof*>(arg);
-    hprof->VisitRoot(obj);
+    DCHECK(arg != NULL);
+    reinterpret_cast<Hprof*>(arg)->VisitRoot(obj);
+    return obj;
   }
 
   static void HeapBitmapCallback(mirror::Object* obj, void* arg)
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 8af4d7e..2bd8353 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -40,7 +40,7 @@
   CHECK_LE(initialCount, maxCount);
   CHECK_NE(desiredKind, kSirtOrInvalid);
 
-  table_ = reinterpret_cast<const mirror::Object**>(malloc(initialCount * sizeof(const mirror::Object*)));
+  table_ = reinterpret_cast<mirror::Object**>(malloc(initialCount * sizeof(const mirror::Object*)));
   CHECK(table_ != NULL);
   memset(table_, 0xd1, initialCount * sizeof(const mirror::Object*));
 
@@ -75,7 +75,7 @@
   return true;
 }
 
-IndirectRef IndirectReferenceTable::Add(uint32_t cookie, const mirror::Object* obj) {
+IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
   IRTSegmentState prevState;
   prevState.all = cookie;
   size_t topIndex = segment_state_.parts.topIndex;
@@ -101,7 +101,7 @@
     }
     DCHECK_GT(newSize, alloc_entries_);
 
-    table_ = reinterpret_cast<const mirror::Object**>(realloc(table_, newSize * sizeof(const mirror::Object*)));
+    table_ = reinterpret_cast<mirror::Object**>(realloc(table_, newSize * sizeof(mirror::Object*)));
     slot_data_ = reinterpret_cast<IndirectRefSlot*>(realloc(slot_data_,
                                                             newSize * sizeof(IndirectRefSlot)));
     if (table_ == NULL || slot_data_ == NULL) {
@@ -126,7 +126,7 @@
   if (numHoles > 0) {
     DCHECK_GT(topIndex, 1U);
     // Find the first hole; likely to be near the end of the list.
-    const mirror::Object** pScan = &table_[topIndex - 1];
+    mirror::Object** pScan = &table_[topIndex - 1];
     DCHECK(*pScan != NULL);
     while (*--pScan != NULL) {
       DCHECK_GE(pScan, table_ + prevState.parts.topIndex);
@@ -194,7 +194,8 @@
   return true;
 }
 
-static int Find(mirror::Object* direct_pointer, int bottomIndex, int topIndex, const mirror::Object** table) {
+static int Find(mirror::Object* direct_pointer, int bottomIndex, int topIndex,
+                mirror::Object** table) {
   for (int i = bottomIndex; i < topIndex; ++i) {
     if (table[i] == direct_pointer) {
       return i;
@@ -310,13 +311,14 @@
 
 void IndirectReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
   for (auto ref : *this) {
-    visitor(*ref, arg);
+    *ref = visitor(const_cast<mirror::Object*>(*ref), arg);
+    DCHECK(*ref != nullptr);
   }
 }
 
 void IndirectReferenceTable::Dump(std::ostream& os) const {
   os << kind_ << " table dump:\n";
-  std::vector<const mirror::Object*> entries(table_, table_ + Capacity());
+  ReferenceTable::Table entries(table_, table_ + Capacity());
   // Remove NULLs.
   for (int i = entries.size() - 1; i >= 0; --i) {
     if (entries[i] == NULL) {
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 26f53db..51b238c 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -206,7 +206,7 @@
 
 class IrtIterator {
  public:
-  explicit IrtIterator(const mirror::Object** table, size_t i, size_t capacity)
+  explicit IrtIterator(mirror::Object** table, size_t i, size_t capacity)
       : table_(table), i_(i), capacity_(capacity) {
     SkipNullsAndTombstones();
   }
@@ -217,7 +217,7 @@
     return *this;
   }
 
-  const mirror::Object** operator*() {
+  mirror::Object** operator*() {
     return &table_[i_];
   }
 
@@ -233,7 +233,7 @@
     }
   }
 
-  const mirror::Object** table_;
+  mirror::Object** table_;
   size_t i_;
   size_t capacity_;
 };
@@ -258,7 +258,7 @@
    * Returns NULL if the table is full (max entries reached, or alloc
    * failed during expansion).
    */
-  IndirectRef Add(uint32_t cookie, const mirror::Object* obj)
+  IndirectRef Add(uint32_t cookie, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
@@ -266,7 +266,7 @@
    *
    * Returns kInvalidIndirectRefObject if iref is invalid.
    */
-  const mirror::Object* Get(IndirectRef iref) const {
+  mirror::Object* Get(IndirectRef iref) const {
     if (!GetChecked(iref)) {
       return kInvalidIndirectRefObject;
     }
@@ -363,7 +363,7 @@
   IRTSegmentState segment_state_;
 
   /* bottom of the stack */
-  const mirror::Object** table_;
+  mirror::Object** table_;
   /* bit mask, ORed into all irefs */
   IndirectRefKind kind_;
   /* extended debugging info */
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index e3a75cf..29d2ae9 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -42,16 +42,19 @@
 }
 
 void InternTable::VisitRoots(RootVisitor* visitor, void* arg,
-                             bool clean_dirty) {
+                             bool only_dirty, bool clean_dirty) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
-  for (const auto& strong_intern : strong_interns_) {
-    visitor(strong_intern.second, arg);
+  if (!only_dirty || is_dirty_) {
+    for (auto& strong_intern : strong_interns_) {
+      strong_intern.second = reinterpret_cast<mirror::String*>(visitor(strong_intern.second, arg));
+      DCHECK(strong_intern.second != nullptr);
+    }
+
+    if (clean_dirty) {
+      is_dirty_ = false;
+    }
   }
-  if (clean_dirty) {
-    is_dirty_ = false;
-  }
-  // Note: we deliberately don't visit the weak_interns_ table and the immutable
-  // image roots.
+  // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
 mirror::String* InternTable::Lookup(Table& table, mirror::String* s,
@@ -123,7 +126,7 @@
     }
 
     // Mark as dirty so that we rescan the roots.
-    Dirty();
+    is_dirty_ = true;
 
     // Check the image for a match.
     mirror::String* image = LookupStringFromImage(s);
@@ -194,14 +197,16 @@
   return found == s;
 }
 
-void InternTable::SweepInternTableWeaks(IsMarkedTester is_marked, void* arg) {
+void InternTable::SweepInternTableWeaks(RootVisitor visitor, void* arg) {
   MutexLock mu(Thread::Current(), intern_table_lock_);
-  // TODO: std::remove_if + lambda.
   for (auto it = weak_interns_.begin(), end = weak_interns_.end(); it != end;) {
     mirror::Object* object = it->second;
-    if (!is_marked(object, arg)) {
+    mirror::Object* new_object = visitor(object, arg);
+    if (new_object == nullptr) {
+      // TODO: use it = weak_interns_.erase(it) when we get a c++11 stl.
       weak_interns_.erase(it++);
     } else {
+      it->second = down_cast<mirror::String*>(new_object);
       ++it;
     }
   }
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index a804d1f..9806130 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -55,22 +55,16 @@
   // Interns a potentially new string in the 'weak' table. (See above.)
   mirror::String* InternWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SweepInternTableWeaks(IsMarkedTester is_marked, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void SweepInternTableWeaks(RootVisitor visitor, void* arg);
 
   bool ContainsWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t Size() const;
 
-  void VisitRoots(RootVisitor* visitor, void* arg, bool clean_dirty);
+  void VisitRoots(RootVisitor* visitor, void* arg, bool only_dirty, bool clean_dirty);
 
   void DumpForSigQuit(std::ostream& os) const;
 
-  bool IsDirty() const { return is_dirty_; }
-  void Dirty() {
-    is_dirty_ = true;
-  }
-
  private:
   typedef std::multimap<int32_t, mirror::String*> Table;
 
@@ -83,7 +77,7 @@
   void Remove(Table& table, const mirror::String* s, uint32_t hash_code);
 
   mutable Mutex intern_table_lock_;
-  bool is_dirty_;
+  bool is_dirty_ GUARDED_BY(intern_table_lock_);
   Table strong_interns_ GUARDED_BY(intern_table_lock_);
   Table weak_interns_ GUARDED_BY(intern_table_lock_);
 };
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index d79d2c4..aa2502d 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -81,8 +81,11 @@
   mutable std::vector<const mirror::String*> expected_;
 };
 
-bool IsMarked(const mirror::Object* object, void* arg) {
-  return reinterpret_cast<TestPredicate*>(arg)->IsMarked(object);
+mirror::Object* IsMarkedSweepingVisitor(mirror::Object* object, void* arg) {
+  if (reinterpret_cast<TestPredicate*>(arg)->IsMarked(object)) {
+    return object;
+  }
+  return nullptr;
 }
 
 TEST_F(InternTableTest, SweepInternTableWeaks) {
@@ -105,7 +108,7 @@
   p.Expect(s1.get());
   {
     ReaderMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
-    t.SweepInternTableWeaks(IsMarked, &p);
+    t.SweepInternTableWeaks(IsMarkedSweepingVisitor, &p);
   }
 
   EXPECT_EQ(2U, t.Size());
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index d72ddf6..7f0fde4 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -314,14 +314,14 @@
   return soa.EncodeField(field);
 }
 
-static void PinPrimitiveArray(const ScopedObjectAccess& soa, const Array* array)
+static void PinPrimitiveArray(const ScopedObjectAccess& soa, Array* array)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JavaVMExt* vm = soa.Vm();
   MutexLock mu(soa.Self(), vm->pins_lock);
   vm->pin_table.Add(array);
 }
 
-static void UnpinPrimitiveArray(const ScopedObjectAccess& soa, const Array* array)
+static void UnpinPrimitiveArray(const ScopedObjectAccess& soa, Array* array)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   JavaVMExt* vm = soa.Vm();
   MutexLock mu(soa.Self(), vm->pins_lock);
@@ -1997,7 +1997,7 @@
     CHECK_NON_NULL_ARGUMENT(GetStringUTFRegion, java_string);
     ScopedObjectAccess soa(env);
     String* s = soa.Decode<String*>(java_string);
-    const CharArray* chars = s->GetCharArray();
+    CharArray* chars = s->GetCharArray();
     PinPrimitiveArray(soa, chars);
     if (is_copy != NULL) {
       *is_copy = JNI_FALSE;
@@ -3217,6 +3217,18 @@
   return native_method;
 }
 
+void JavaVMExt::SweepJniWeakGlobals(RootVisitor visitor, void* arg) {
+  WriterMutexLock mu(Thread::Current(), weak_globals_lock);
+  for (mirror::Object** entry : weak_globals) {
+    mirror::Object* obj = *entry;
+    mirror::Object* new_obj = visitor(obj, arg);
+    if (new_obj == nullptr) {
+      new_obj = kClearedJniWeakGlobal;
+    }
+    *entry = new_obj;
+  }
+}
+
 void JavaVMExt::VisitRoots(RootVisitor* visitor, void* arg) {
   Thread* self = Thread::Current();
   {
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index bad3841..2fcebf0 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -89,6 +89,8 @@
 
   void SetCheckJniEnabled(bool enabled);
 
+  void SweepJniWeakGlobals(RootVisitor visitor, void* arg);
+
   void VisitRoots(RootVisitor*, void*);
 
   Runtime* runtime;
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 7d968c7..b82683e 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -33,6 +33,10 @@
   return GetFieldObject<const CharArray*>(ValueOffset(), false);
 }
 
+CharArray* String::GetCharArray() {
+  return GetFieldObject<CharArray*>(ValueOffset(), false);
+}
+
 void String::ComputeHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   SetHashCode(ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength()));
 }
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index bf545ea..01d8f31 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -44,6 +44,7 @@
   }
 
   const CharArray* GetCharArray() const;
+  CharArray* GetCharArray();
 
   int32_t GetOffset() const {
     int32_t result = GetField32(OffsetOffset(), false);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 92e6541..570c2be 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -194,6 +194,10 @@
   return obj_;
 }
 
+void Monitor::SetObject(mirror::Object* object) {
+  obj_ = object;
+}
+
 void Monitor::Lock(Thread* self) {
   if (owner_ == self) {
     lock_count_++;
@@ -1001,15 +1005,19 @@
   list_.push_front(m);
 }
 
-void MonitorList::SweepMonitorList(IsMarkedTester is_marked, void* arg) {
+void MonitorList::SweepMonitorList(RootVisitor visitor, void* arg) {
   MutexLock mu(Thread::Current(), monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
-    if (!is_marked(m->GetObject(), arg)) {
-      VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object " << m->GetObject();
+    mirror::Object* obj = m->GetObject();
+    mirror::Object* new_obj = visitor(obj, arg);
+    if (new_obj == nullptr) {
+      VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
+                    << m->GetObject();
       delete m;
       it = list_.erase(it);
     } else {
+      m->SetObject(new_obj);
       ++it;
     }
   }
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 6651768..4249316 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -103,6 +103,7 @@
   static bool IsValidLockWord(int32_t lock_word);
 
   mirror::Object* GetObject();
+  void SetObject(mirror::Object* object);
 
  private:
   explicit Monitor(Thread* owner, mirror::Object* obj)
@@ -159,7 +160,7 @@
   int lock_count_ GUARDED_BY(monitor_lock_);
 
   // What object are we part of (for debugging).
-  mirror::Object* const obj_;
+  mirror::Object* obj_;
 
   // Threads currently waiting on this monitor.
   Thread* wait_set_ GUARDED_BY(monitor_lock_);
@@ -183,8 +184,7 @@
 
   void Add(Monitor* m);
 
-  void SweepMonitorList(IsMarkedTester is_marked, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void SweepMonitorList(RootVisitor visitor, void* arg);
 
  private:
   Mutex monitor_list_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 8e23cbb..e95fdb9 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -38,16 +38,16 @@
 ReferenceTable::~ReferenceTable() {
 }
 
-void ReferenceTable::Add(const mirror::Object* obj) {
+void ReferenceTable::Add(mirror::Object* obj) {
   DCHECK(obj != NULL);
-  if (entries_.size() == max_size_) {
+  if (entries_.size() >= max_size_) {
     LOG(FATAL) << "ReferenceTable '" << name_ << "' "
                << "overflowed (" << max_size_ << " entries)";
   }
   entries_.push_back(obj);
 }
 
-void ReferenceTable::Remove(const mirror::Object* obj) {
+void ReferenceTable::Remove(mirror::Object* obj) {
   // We iterate backwards on the assumption that references are LIFO.
   for (int i = entries_.size() - 1; i >= 0; --i) {
     if (entries_[i] == obj) {
@@ -232,8 +232,8 @@
 }
 
 void ReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
-  for (const auto& ref : entries_) {
-    visitor(ref, arg);
+  for (auto& ref : entries_) {
+    ref = visitor(const_cast<mirror::Object*>(ref), arg);
   }
 }
 
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index e369fd0..37b3172 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -39,9 +39,9 @@
   ReferenceTable(const char* name, size_t initial_size, size_t max_size);
   ~ReferenceTable();
 
-  void Add(const mirror::Object* obj);
+  void Add(mirror::Object* obj);
 
-  void Remove(const mirror::Object* obj);
+  void Remove(mirror::Object* obj);
 
   size_t Size() const;
 
@@ -50,7 +50,7 @@
   void VisitRoots(RootVisitor* visitor, void* arg);
 
  private:
-  typedef std::vector<const mirror::Object*> Table;
+  typedef std::vector<mirror::Object*> Table;
   static void Dump(std::ostream& os, const Table& entries)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   friend class IndirectReferenceTable;  // For Dump.
diff --git a/runtime/root_visitor.h b/runtime/root_visitor.h
index 3aa9b4b..a2d898b 100644
--- a/runtime/root_visitor.h
+++ b/runtime/root_visitor.h
@@ -23,7 +23,8 @@
 }  // namespace mirror
 class StackVisitor;
 
-typedef void (RootVisitor)(const mirror::Object* root, void* arg);
+typedef mirror::Object* (RootVisitor)(mirror::Object* root, void* arg)
+    __attribute__((warn_unused_result));
 typedef void (VerifyRootVisitor)(const mirror::Object* root, void* arg, size_t vreg,
                                  const StackVisitor* visitor);
 typedef bool (IsMarkedTester)(const mirror::Object* object, void* arg);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 09cbd0b..fe33185 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -319,6 +319,12 @@
   return result;
 }
 
+void Runtime::SweepSystemWeaks(RootVisitor* visitor, void* arg) {
+  GetInternTable()->SweepInternTableWeaks(visitor, arg);
+  GetMonitorList()->SweepMonitorList(visitor, arg);
+  GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
+}
+
 Runtime::ParsedOptions* Runtime::ParsedOptions::Create(const Options& options, bool ignore_unrecognized) {
   UniquePtr<ParsedOptions> parsed(new ParsedOptions());
   const char* boot_class_path_string = getenv("BOOTCLASSPATH");
@@ -1132,22 +1138,23 @@
 
 void Runtime::VisitConcurrentRoots(RootVisitor* visitor, void* arg, bool only_dirty,
                                    bool clean_dirty) {
-  if (!only_dirty || intern_table_->IsDirty()) {
-    intern_table_->VisitRoots(visitor, arg, clean_dirty);
-  }
-  if (!only_dirty || class_linker_->IsDirty()) {
-    class_linker_->VisitRoots(visitor, arg, clean_dirty);
-  }
+  intern_table_->VisitRoots(visitor, arg, only_dirty, clean_dirty);
+  class_linker_->VisitRoots(visitor, arg, only_dirty, clean_dirty);
 }
 
 void Runtime::VisitNonThreadRoots(RootVisitor* visitor, void* arg) {
   java_vm_->VisitRoots(visitor, arg);
-  if (pre_allocated_OutOfMemoryError_ != NULL) {
-    visitor(pre_allocated_OutOfMemoryError_, arg);
+  if (pre_allocated_OutOfMemoryError_ != nullptr) {
+    pre_allocated_OutOfMemoryError_ = reinterpret_cast<mirror::Throwable*>(
+        visitor(pre_allocated_OutOfMemoryError_, arg));
+    DCHECK(pre_allocated_OutOfMemoryError_ != nullptr);
   }
-  visitor(resolution_method_, arg);
+  resolution_method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(resolution_method_, arg));
+  DCHECK(resolution_method_ != nullptr);
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    visitor(callee_save_methods_[i], arg);
+    callee_save_methods_[i] = reinterpret_cast<mirror::ArtMethod*>(
+        visitor(callee_save_methods_[i], arg));
+    DCHECK(callee_save_methods_[i] != nullptr);
   }
 }
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 21161a0..5acd5d7 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -317,6 +317,10 @@
   void VisitNonConcurrentRoots(RootVisitor* visitor, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Sweep system weaks, the system weak is deleted if the visitor return nullptr. Otherwise, the
+  // system weak is updated to be the visitor's returned value.
+  void SweepSystemWeaks(RootVisitor* visitor, void* arg);
+
   // Returns a special method that calls into a trampoline for runtime method resolution
   mirror::ArtMethod* GetResolutionMethod() const {
     CHECK(HasResolutionMethod());
diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h
index 81f0dff..25d6fb3 100644
--- a/runtime/sirt_ref.h
+++ b/runtime/sirt_ref.h
@@ -30,7 +30,7 @@
     self_->PushSirt(&sirt_);
   }
   ~SirtRef() {
-    CHECK(self_->PopSirt() == &sirt_);
+    CHECK_EQ(self_->PopSirt(), &sirt_);
   }
 
   T& operator*() const { return *get(); }
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 206bff3..1715664 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -148,8 +148,8 @@
       const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
       DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
       size_t frame_size = m->GetFrameSizeInBytes();
-      return GetVReg(cur_quick_frame_, code_item, m->GetCoreSpillMask(), m->GetFpSpillMask(),
-                     frame_size, vreg);
+      return *GetVRegAddr(cur_quick_frame_, code_item, m->GetCoreSpillMask(), m->GetFpSpillMask(),
+                          frame_size, vreg);
     }
   } else {
     return cur_shadow_frame_->GetVReg(vreg);
diff --git a/runtime/stack.h b/runtime/stack.h
index 8ecf8f0..bd29ceb 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -150,7 +150,12 @@
   mirror::Object* GetVRegReference(size_t i) const {
     DCHECK_LT(i, NumberOfVRegs());
     if (HasReferenceArray()) {
-      return References()[i];
+      mirror::Object* ref = References()[i];
+      // If the vreg reference is not equal to the vreg then the vreg reference is stale.
+      if (reinterpret_cast<uint32_t>(ref) != vregs_[i]) {
+        return nullptr;
+      }
+      return ref;
     } else {
       const uint32_t* vreg = &vregs_[i];
       return *reinterpret_cast<mirror::Object* const*>(vreg);
@@ -459,13 +464,14 @@
   uintptr_t GetGPR(uint32_t reg) const;
   void SetGPR(uint32_t reg, uintptr_t value);
 
-  uint32_t GetVReg(mirror::ArtMethod** cur_quick_frame, const DexFile::CodeItem* code_item,
+  // This is a fast-path for getting/setting values in a quick frame.
+  uint32_t* GetVRegAddr(mirror::ArtMethod** cur_quick_frame, const DexFile::CodeItem* code_item,
                    uint32_t core_spills, uint32_t fp_spills, size_t frame_size,
                    uint16_t vreg) const {
     int offset = GetVRegOffset(code_item, core_spills, fp_spills, frame_size, vreg);
     DCHECK_EQ(cur_quick_frame, GetCurrentQuickFrame());
     byte* vreg_addr = reinterpret_cast<byte*>(cur_quick_frame) + offset;
-    return *reinterpret_cast<uint32_t*>(vreg_addr);
+    return reinterpret_cast<uint32_t*>(vreg_addr);
   }
 
   uintptr_t GetReturnPc() const;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index a454195..d7d4b1f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1012,9 +1012,10 @@
   }
 }
 
-static void MonitorExitVisitor(const mirror::Object* object, void* arg) NO_THREAD_SAFETY_ANALYSIS {
+static mirror::Object* MonitorExitVisitor(mirror::Object* object, void* arg)
+    NO_THREAD_SAFETY_ANALYSIS {
   Thread* self = reinterpret_cast<Thread*>(arg);
-  mirror::Object* entered_monitor = const_cast<mirror::Object*>(object);
+  mirror::Object* entered_monitor = object;
   if (self->HoldsLock(entered_monitor)) {
     LOG(WARNING) << "Calling MonitorExit on object "
                  << object << " (" << PrettyTypeOf(object) << ")"
@@ -1022,6 +1023,7 @@
                  << *Thread::Current() << " which is detaching";
     entered_monitor->MonitorExit(self);
   }
+  return object;
 }
 
 void Thread::Destroy() {
@@ -1151,8 +1153,12 @@
     size_t num_refs = cur->NumberOfReferences();
     for (size_t j = 0; j < num_refs; j++) {
       mirror::Object* object = cur->GetReference(j);
-      if (object != NULL) {
-        visitor(object, arg);
+      if (object != nullptr) {
+        const mirror::Object* new_obj = visitor(object, arg);
+        DCHECK(new_obj != nullptr);
+        if (new_obj != object) {
+          cur->SetReference(j, const_cast<mirror::Object*>(new_obj));
+        }
       }
     }
   }
@@ -2019,8 +2025,11 @@
         // SIRT for JNI or References for interpreter.
         for (size_t reg = 0; reg < num_regs; ++reg) {
           mirror::Object* ref = shadow_frame->GetVRegReference(reg);
-          if (ref != NULL) {
-            visitor_(ref, reg, this);
+          if (ref != nullptr) {
+            mirror::Object* new_ref = visitor_(ref, reg, this);
+            if (new_ref != ref) {
+             shadow_frame->SetVRegReference(reg, new_ref);
+            }
           }
         }
       } else {
@@ -2040,8 +2049,11 @@
         for (size_t reg = 0; reg < num_regs; ++reg) {
           if (TestBitmap(reg, reg_bitmap)) {
             mirror::Object* ref = shadow_frame->GetVRegReference(reg);
-            if (ref != NULL) {
-              visitor_(ref, reg, this);
+            if (ref != nullptr) {
+              mirror::Object* new_ref = visitor_(ref, reg, this);
+              if (new_ref != ref) {
+               shadow_frame->SetVRegReference(reg, new_ref);
+              }
             }
           }
         }
@@ -2072,19 +2084,25 @@
             // Does this register hold a reference?
             if (TestBitmap(reg, reg_bitmap)) {
               uint32_t vmap_offset;
-              mirror::Object* ref;
               if (vmap_table.IsInContext(reg, kReferenceVReg, &vmap_offset)) {
-                uintptr_t val = GetGPR(vmap_table.ComputeRegister(core_spills, vmap_offset,
-                                                                  kReferenceVReg));
-                ref = reinterpret_cast<mirror::Object*>(val);
+                int vmap_reg = vmap_table.ComputeRegister(core_spills, vmap_offset, kReferenceVReg);
+                mirror::Object* ref = reinterpret_cast<mirror::Object*>(GetGPR(vmap_reg));
+                if (ref != nullptr) {
+                  mirror::Object* new_ref = visitor_(ref, reg, this);
+                  if (ref != new_ref) {
+                    SetGPR(vmap_reg, reinterpret_cast<uintptr_t>(new_ref));
+                  }
+                }
               } else {
-                ref = reinterpret_cast<mirror::Object*>(GetVReg(cur_quick_frame, code_item,
-                                                                core_spills, fp_spills, frame_size,
-                                                                reg));
-              }
-
-              if (ref != NULL) {
-                visitor_(ref, reg, this);
+                uint32_t* reg_addr =
+                    GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size, reg);
+                mirror::Object* ref = reinterpret_cast<mirror::Object*>(*reg_addr);
+                if (ref != nullptr) {
+                  mirror::Object* new_ref = visitor_(ref, reg, this);
+                  if (ref != new_ref) {
+                    *reg_addr = reinterpret_cast<uint32_t>(new_ref);
+                  }
+                }
               }
             }
           }
@@ -2110,8 +2128,8 @@
  public:
   RootCallbackVisitor(RootVisitor* visitor, void* arg) : visitor_(visitor), arg_(arg) {}
 
-  void operator()(const mirror::Object* obj, size_t, const StackVisitor*) const {
-    visitor_(obj, arg_);
+  mirror::Object* operator()(mirror::Object* obj, size_t, const StackVisitor*) const {
+    return visitor_(obj, arg_);
   }
 
  private:
@@ -2135,67 +2153,17 @@
   void* const arg_;
 };
 
-struct VerifyRootWrapperArg {
-  VerifyRootVisitor* visitor;
-  void* arg;
-};
-
-static void VerifyRootWrapperCallback(const mirror::Object* root, void* arg) {
-  VerifyRootWrapperArg* wrapperArg = reinterpret_cast<VerifyRootWrapperArg*>(arg);
-  wrapperArg->visitor(root, wrapperArg->arg, 0, NULL);
-}
-
-void Thread::VerifyRoots(VerifyRootVisitor* visitor, void* arg) {
-  // We need to map from a RootVisitor to VerifyRootVisitor, so pass in nulls for arguments we
-  // don't have.
-  VerifyRootWrapperArg wrapperArg;
-  wrapperArg.arg = arg;
-  wrapperArg.visitor = visitor;
-
-  if (opeer_ != NULL) {
-    VerifyRootWrapperCallback(opeer_, &wrapperArg);
-  }
-  if (exception_ != NULL) {
-    VerifyRootWrapperCallback(exception_, &wrapperArg);
-  }
-  throw_location_.VisitRoots(VerifyRootWrapperCallback, &wrapperArg);
-  if (class_loader_override_ != NULL) {
-    VerifyRootWrapperCallback(class_loader_override_, &wrapperArg);
-  }
-  jni_env_->locals.VisitRoots(VerifyRootWrapperCallback, &wrapperArg);
-  jni_env_->monitors.VisitRoots(VerifyRootWrapperCallback, &wrapperArg);
-
-  SirtVisitRoots(VerifyRootWrapperCallback, &wrapperArg);
-
-  // Visit roots on this thread's stack
-  Context* context = GetLongJumpContext();
-  VerifyCallbackVisitor visitorToCallback(visitor, arg);
-  ReferenceMapVisitor<VerifyCallbackVisitor> mapper(this, context, visitorToCallback);
-  mapper.WalkStack();
-  ReleaseLongJumpContext(context);
-
-  std::deque<instrumentation::InstrumentationStackFrame>* instrumentation_stack = GetInstrumentationStack();
-  typedef std::deque<instrumentation::InstrumentationStackFrame>::const_iterator It;
-  for (It it = instrumentation_stack->begin(), end = instrumentation_stack->end(); it != end; ++it) {
-    mirror::Object* this_object = (*it).this_object_;
-    if (this_object != NULL) {
-      VerifyRootWrapperCallback(this_object, &wrapperArg);
-    }
-    mirror::ArtMethod* method = (*it).method_;
-    VerifyRootWrapperCallback(method, &wrapperArg);
-  }
-}
-
 void Thread::VisitRoots(RootVisitor* visitor, void* arg) {
-  if (opeer_ != NULL) {
-    visitor(opeer_, arg);
+  if (opeer_ != nullptr) {
+    opeer_ = visitor(opeer_, arg);
   }
-  if (exception_ != NULL) {
-    visitor(exception_, arg);
+  if (exception_ != nullptr) {
+    exception_ = reinterpret_cast<mirror::Throwable*>(visitor(exception_, arg));
   }
   throw_location_.VisitRoots(visitor, arg);
-  if (class_loader_override_ != NULL) {
-    visitor(class_loader_override_, arg);
+  if (class_loader_override_ != nullptr) {
+    class_loader_override_ = reinterpret_cast<mirror::ClassLoader*>(
+        visitor(class_loader_override_, arg));
   }
   jni_env_->locals.VisitRoots(visitor, arg);
   jni_env_->monitors.VisitRoots(visitor, arg);
@@ -2209,24 +2177,26 @@
   mapper.WalkStack();
   ReleaseLongJumpContext(context);
 
-  for (const instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
-    mirror::Object* this_object = frame.this_object_;
-    if (this_object != NULL) {
-      visitor(this_object, arg);
+  for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
+    if (frame.this_object_ != nullptr) {
+      frame.this_object_ = visitor(frame.this_object_, arg);
+      DCHECK(frame.this_object_ != nullptr);
     }
-    mirror::ArtMethod* method = frame.method_;
-    visitor(method, arg);
+    frame.method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
+    DCHECK(frame.method_ != nullptr);
   }
 }
 
-static void VerifyObject(const mirror::Object* root, void* arg) {
-  gc::Heap* heap = reinterpret_cast<gc::Heap*>(arg);
-  heap->VerifyObject(root);
+static mirror::Object* VerifyRoot(mirror::Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  reinterpret_cast<gc::Heap*>(arg)->VerifyObject(root);
+  return root;
 }
 
 void Thread::VerifyStackImpl() {
   UniquePtr<Context> context(Context::Create());
-  RootCallbackVisitor visitorToCallback(VerifyObject, Runtime::Current()->GetHeap());
+  RootCallbackVisitor visitorToCallback(VerifyRoot, Runtime::Current()->GetHeap());
   ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitorToCallback);
   mapper.WalkStack();
 }
diff --git a/runtime/thread.h b/runtime/thread.h
index f5f8f56..dbf9736 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -395,9 +395,6 @@
 
   void VisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void VerifyRoots(VerifyRootVisitor* visitor, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   void VerifyStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   //
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index aba81fe..44cf810 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -569,10 +569,24 @@
   }
 }
 
+struct VerifyRootWrapperArg {
+  VerifyRootVisitor* visitor;
+  void* arg;
+};
+
+static mirror::Object* VerifyRootWrapperCallback(mirror::Object* root, void* arg) {
+  VerifyRootWrapperArg* wrapperArg = reinterpret_cast<VerifyRootWrapperArg*>(arg);
+  wrapperArg->visitor(root, wrapperArg->arg, 0, NULL);
+  return root;
+}
+
 void ThreadList::VerifyRoots(VerifyRootVisitor* visitor, void* arg) const {
+  VerifyRootWrapperArg wrapper;
+  wrapper.visitor = visitor;
+  wrapper.arg = arg;
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
-    thread->VerifyRoots(visitor, arg);
+    thread->VisitRoots(VerifyRootWrapperCallback, &wrapper);
   }
 }
 
diff --git a/runtime/throw_location.cc b/runtime/throw_location.cc
index e428511..01497ef 100644
--- a/runtime/throw_location.cc
+++ b/runtime/throw_location.cc
@@ -34,11 +34,14 @@
 }
 
 void ThrowLocation::VisitRoots(RootVisitor* visitor, void* arg) {
-  if (this_object_ != NULL) {
-    visitor(this_object_, arg);
+  if (this_object_ != nullptr) {
+    this_object_ = const_cast<mirror::Object*>(visitor(this_object_, arg));
+    DCHECK(this_object_ != nullptr);
   }
-  if (method_ != NULL) {
-    visitor(method_, arg);
+  if (method_ != nullptr) {
+    method_ = const_cast<mirror::ArtMethod*>(
+        reinterpret_cast<const mirror::ArtMethod*>(visitor(method_, arg)));
+    DCHECK(method_ != nullptr);
   }
 }
 
diff --git a/test/run-test b/test/run-test
index 11dcfc5..c449e84 100755
--- a/test/run-test
+++ b/test/run-test
@@ -269,7 +269,7 @@
         fi
     fi
     # Clean up extraneous files that are not used by tests.
-    find $tmp_dir -mindepth 1  ! -regex ".*/\(.*jar\|$build_output\|$expected\)" | xargs rm -rf
+    find $tmp_dir -mindepth 1  ! -regex ".*/\(.*jar\|$output\|$expected\)" | xargs rm -rf
     exit 0
 else
     "./${build}" >"$build_output" 2>&1