174 files changed, 5739 insertions, 2255 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 5a3236d958..730e61d488 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -26,6 +26,7 @@ GTEST_DEX_DIRECTORIES := \
   AllFields \
   ExceptionHandle \
   GetMethodSignature \
+  Instrumentation \
   Interfaces \
   Main \
   MultiDex \
@@ -64,6 +65,7 @@ ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Stati
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
+ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
 ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
 ART_GTEST_oat_file_assistant_test_DEX_DEPS := Main MainStripped MultiDex Nested
@@ -157,6 +159,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \
   runtime/handle_scope_test.cc \
   runtime/indenter_test.cc \
   runtime/indirect_reference_table_test.cc \
+  runtime/instrumentation_test.cc \
   runtime/intern_table_test.cc \
   runtime/interpreter/safe_math_test.cc \
   runtime/java_vm_ext_test.cc \
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 0acdd422df..b78b3d7d75 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -172,7 +172,6 @@ enum ExtendedMIROpcode {
   kMirOpRangeCheck,
   kMirOpDivZeroCheck,
   kMirOpCheck,
-  kMirOpCheckPart2,
   kMirOpSelect,
 
   // Vector opcodes:
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index 4f0e9d1b67..915fbcda04 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -20,6 +20,7 @@
 
 #include "base/bit_vector-inl.h"
 #include "base/macros.h"
+#include "base/allocator.h"
 #include "compiler_enums.h"
 #include "dataflow_iterator-inl.h"
 #include "dex_instruction.h"
@@ -75,6 +76,9 @@ inline void GvnDeadCodeElimination::MIRData::RemovePrevChange(int v_reg, MIRData
 GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc)
     : num_vregs_(num_vregs),
       vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)),
+      vreg_high_words_(num_vregs, false, Allocator::GetNoopAllocator(),
+                       BitVector::BitsToWords(num_vregs),
+                       alloc->AllocArray<uint32_t>(BitVector::BitsToWords(num_vregs))),
       mir_data_(alloc->Adapter()) {
   mir_data_.reserve(100);
 }
@@ -82,6 +86,7 @@ GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAl
 inline void GvnDeadCodeElimination::VRegChains::Reset() {
   DCHECK(mir_data_.empty());
   std::fill_n(vreg_data_, num_vregs_, VRegValue());
+  vreg_high_words_.ClearAllBits();
 }
 
 void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool wide,
@@ -93,24 +98,26 @@ void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool
   data->wide_def = wide;
   data->vreg_def = v_reg;
 
-  if (vreg_data_[v_reg].change != kNPos &&
-      mir_data_[vreg_data_[v_reg].change].vreg_def + 1 == v_reg) {
-    data->low_def_over_high_word = true;
-  }
-  data->prev_value = vreg_data_[v_reg];
   DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+  data->prev_value = vreg_data_[v_reg];
+  data->low_def_over_high_word =
+      (vreg_data_[v_reg].change != kNPos)
+      ? GetMIRData(vreg_data_[v_reg].change)->vreg_def + 1 == v_reg
+      : vreg_high_words_.IsBitSet(v_reg);
   vreg_data_[v_reg].value = new_value;
   vreg_data_[v_reg].change = pos;
+  vreg_high_words_.ClearBit(v_reg);
 
   if (wide) {
-    if (vreg_data_[v_reg + 1].change != kNPos &&
-        mir_data_[vreg_data_[v_reg + 1].change].vreg_def == v_reg + 1) {
-      data->high_def_over_low_word = true;
-    }
-    data->prev_value_high = vreg_data_[v_reg + 1];
     DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+    data->prev_value_high = vreg_data_[v_reg + 1];
+    data->high_def_over_low_word =
+        (vreg_data_[v_reg + 1].change != kNPos)
+        ? GetMIRData(vreg_data_[v_reg + 1].change)->vreg_def == v_reg + 1
+        : !vreg_high_words_.IsBitSet(v_reg + 1);
     vreg_data_[v_reg + 1].value = new_value;
     vreg_data_[v_reg + 1].change = pos;
+    vreg_high_words_.SetBit(v_reg + 1);
   }
 }
 
@@ -123,9 +130,17 @@ void GvnDeadCodeElimination::VRegChains::RemoveLastMIRData() {
   if (data->has_def) {
     DCHECK_EQ(vreg_data_[data->vreg_def].change, NumMIRs() - 1u);
     vreg_data_[data->vreg_def] = data->prev_value;
+    DCHECK(!vreg_high_words_.IsBitSet(data->vreg_def));
+    if (data->low_def_over_high_word) {
+      vreg_high_words_.SetBit(data->vreg_def);
+    }
     if (data->wide_def) {
       DCHECK_EQ(vreg_data_[data->vreg_def + 1].change, NumMIRs() - 1u);
       vreg_data_[data->vreg_def + 1] = data->prev_value_high;
+      DCHECK(vreg_high_words_.IsBitSet(data->vreg_def + 1));
+      if (data->high_def_over_low_word) {
+        vreg_high_words_.ClearBit(data->vreg_def + 1);
+      }
     }
   }
   mir_data_.pop_back();
@@ -169,6 +184,7 @@ void GvnDeadCodeElimination::VRegChains::InsertInitialValueHigh(int v_reg, uint1
   uint16_t change = vreg_data_[v_reg].change;
   if (change == kNPos) {
     vreg_data_[v_reg].value = value;
+    vreg_high_words_.SetBit(v_reg);
   } else {
     while (true) {
       MIRData* data = &mir_data_[change];
@@ -208,6 +224,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool
         }
       }
       vreg_data_[v_reg].value = old_value;
+      DCHECK(!vreg_high_words_.IsBitSet(v_reg));  // Keep marked as low word.
     }
   } else {
     DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
@@ -223,6 +240,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool
         old_value = lvn->GetStartingVregValueNumber(v_reg);
       }
       vreg_data_[v_reg].value = old_value;
+      DCHECK(!vreg_high_words_.IsBitSet(v_reg));  // Keep marked as low word.
     }
     if (check_high && vreg_data_[v_reg + 1].value == kNoValue) {
       uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg + 1);
@@ -234,6 +252,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool
         }
       }
       vreg_data_[v_reg + 1].value = old_value;
+      DCHECK(!vreg_high_words_.IsBitSet(v_reg + 1));  // Keep marked as low word.
     }
   }
 }
@@ -300,6 +319,8 @@ void GvnDeadCodeElimination::VRegChains::ReplaceChange(uint16_t old_change, uint
     if (next_change == kNPos) {
       DCHECK_EQ(vreg_data_[v_reg].change, old_change);
       vreg_data_[v_reg].change = new_change;
+      DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == old_data->vreg_def + 1);
+      // No change in vreg_high_words_.
     } else {
       DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), old_change);
       mir_data_[next_change].SetPrevChange(v_reg, new_change);
@@ -316,6 +337,13 @@ void GvnDeadCodeElimination::VRegChains::RemoveChange(uint16_t change) {
     if (next_change == kNPos) {
       DCHECK_EQ(vreg_data_[v_reg].change, change);
       vreg_data_[v_reg] = (data->vreg_def == v_reg) ? data->prev_value : data->prev_value_high;
+      DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == data->vreg_def + 1);
+      if (data->vreg_def == v_reg && data->low_def_over_high_word) {
+        vreg_high_words_.SetBit(v_reg);
+      } else if (data->vreg_def != v_reg &&
+          (data->high_def_over_low_word || data->prev_value_high.value == kNoValue)) {
+        vreg_high_words_.ClearBit(v_reg);
+      }
     } else {
       DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), change);
       mir_data_[next_change].RemovePrevChange(v_reg, data);
@@ -533,7 +561,7 @@ MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint1
 
   // Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the
   // same dalvik reg to keep consistency with subsequent instructions. However, if there's no
-  // defining MIR for that dalvik reg, the preserved valus must come from its predecessors
+  // defining MIR for that dalvik reg, the preserved values must come from its predecessors
   // and we need to create a new Phi (a degenerate Phi if there's only a single predecessor).
   if (def_change == kNPos) {
     if (wide) {
@@ -541,7 +569,21 @@ MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint1
       DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1));
       CreatePhi(new_s_reg + 1);  // High word Phi.
     }
-    return CreatePhi(new_s_reg);
+    MIR* phi = CreatePhi(new_s_reg);
+    // If this is a degenerate Phi with all inputs being the same SSA reg, we need to its uses.
+    DCHECK_NE(phi->ssa_rep->num_uses, 0u);
+    int old_s_reg = phi->ssa_rep->uses[0];
+    bool all_same = true;
+    for (size_t i = 1u, num = phi->ssa_rep->num_uses; i != num; ++i) {
+      if (phi->ssa_rep->uses[i] != old_s_reg) {
+        all_same = false;
+        break;
+      }
+    }
+    if (all_same) {
+      vreg_chains_.RenameSRegUses(0u, last_change, old_s_reg, new_s_reg, wide);
+    }
+    return phi;
   } else {
     DCHECK_LT(def_change, last_change);
     DCHECK_LE(last_change, vreg_chains_.NumMIRs());
diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h
index bc75a01778..06022db501 100644
--- a/compiler/dex/gvn_dead_code_elimination.h
+++ b/compiler/dex/gvn_dead_code_elimination.h
@@ -121,6 +121,7 @@ class GvnDeadCodeElimination : public DeletableArenaObject<kArenaAllocMisc> {
    private:
     const uint32_t num_vregs_;
     VRegValue* const vreg_data_;
+    BitVector vreg_high_words_;
     ScopedArenaVector<MIRData> mir_data_;
   };
 
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index f9f0882f08..4f8127338c 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -1629,6 +1629,52 @@ TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi1) {
 }
 
 TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000),
+      DEF_MOVE(4, Instruction::MOVE, 1u, 0u),
+      DEF_CONST(4, Instruction::CONST, 2u, 1000),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  static const bool eliminated[] = {
+      false, false, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that we've created a single-input Phi to replace the CONST 3u.
+  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+  MIR* phi = bb4->first_mir_insn;
+  ASSERT_TRUE(phi != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+  ASSERT_EQ(1, phi->ssa_rep->num_uses);
+  EXPECT_EQ(0, phi->ssa_rep->uses[0]);
+  ASSERT_EQ(1, phi->ssa_rep->num_defs);
+  EXPECT_EQ(2, phi->ssa_rep->defs[0]);
+  EXPECT_EQ(0u, phi->dalvikInsn.vA);
+  MIR* move = phi->next;
+  ASSERT_TRUE(move != nullptr);
+  ASSERT_EQ(Instruction::MOVE, move->dalvikInsn.opcode);
+  ASSERT_EQ(1, move->ssa_rep->num_uses);
+  EXPECT_EQ(2, move->ssa_rep->uses[0]);
+  ASSERT_EQ(1, move->ssa_rep->num_defs);
+  EXPECT_EQ(1, move->ssa_rep->defs[0]);
+  EXPECT_EQ(1u, move->dalvikInsn.vA);
+  EXPECT_EQ(0u, move->dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi3) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false, kDexMemAccessWord },
   };
@@ -1850,4 +1896,39 @@ TEST_F(GvnDeadCodeEliminationTestLoop, IFieldLoopVariable) {
   EXPECT_EQ(2u, phi->dalvikInsn.vA);
 }
 
+TEST_F(GvnDeadCodeEliminationTestDiamond, LongOverlaps1) {
+  static const MIRDef mirs[] = {
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 2u, 1000u),
+      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 4u, 0u),
+      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 6u, 2u),
+      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 8u, 4u),
+      DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 10u, 6u),
+  };
+
+  // The last insn should overlap the first and second.
+  static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 0, 2, 4, 6, 8, 10 };
+  MarkAsWideSRegs(wide_sregs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+  EXPECT_EQ(value_names_[0], value_names_[4]);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index b4aec98e01..a7ba061984 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -834,9 +834,6 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
   // 10B MIR_CHECK
   0,
 
-  // 10C MIR_CHECKPART2
-  0,
-
   // 10D MIR_SELECT
   DF_DA | DF_UB,
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 9e3fbbc967..1871f07106 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -52,8 +52,7 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = {
   "OpNullCheck",
   "OpRangeCheck",
   "OpDivZeroCheck",
-  "Check1",
-  "Check2",
+  "Check",
   "Select",
   "ConstVector",
   "MoveVector",
@@ -1508,7 +1507,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
   Instruction::Format dalvik_format = Instruction::k10x;  // Default to no-operand format.
 
   // Handle special cases that recover the original dalvik instruction.
-  if ((opcode == kMirOpCheck) || (opcode == kMirOpCheckPart2)) {
+  if (opcode == kMirOpCheck) {
     str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
     str.append(": ");
     // Recover the original Dex instruction.
@@ -2517,8 +2516,6 @@ int MIR::DecodedInstruction::FlagsOf() const {
       return Instruction::kContinue | Instruction::kThrow;
     case kMirOpCheck:
       return Instruction::kContinue | Instruction::kThrow;
-    case kMirOpCheckPart2:
-      return Instruction::kContinue;
     case kMirOpSelect:
       return Instruction::kContinue;
     case kMirOpConstVector:
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 5654604797..94be1fd4a5 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -169,7 +169,8 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
         ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
             (kInvokeTypeMask << kBitSharpTypeBegin));
     it->flags_ = other_flags |
-        (fast_path_flags != 0 ? kFlagFastPath : 0u) |
+        // String init path is a special always-fast path.
+        (fast_path_flags != 0 || string_init ? kFlagFastPath : 0u) |
         ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
         ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
         (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index fb68335e6e..86bb69d01e 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1391,22 +1391,6 @@ void Mir2Lir::InitReferenceVRegs(BasicBlock* bb, BitVector* references) {
       }
     }
   }
-  if (bb->block_type != kEntryBlock && bb->first_mir_insn != nullptr &&
-      static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpCheckPart2) {
-    // In Mir2Lir::MethodBlockCodeGen() we have artificially moved the throwing
-    // instruction to the previous block. However, the MIRGraph data used above
-    // doesn't reflect that, so we still need to process that MIR insn here.
-    MIR* mir = nullptr;
-    BasicBlock* pred_bb = bb;
-    // Traverse empty blocks.
-    while (mir == nullptr && pred_bb->predecessors.size() == 1u) {
-      pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[0]);
-      DCHECK(pred_bb != nullptr);
-      mir = pred_bb->last_mir_insn;
-    }
-    DCHECK(mir != nullptr);
-    UpdateReferenceVRegsLocal(nullptr, mir, references);
-  }
 }
 
 bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index e9e9161a1c..e3e87ecb13 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1187,7 +1187,6 @@ void Mir2Lir::HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
     case kMirOpRangeCheck:
     case kMirOpDivZeroCheck:
     case kMirOpCheck:
-    case kMirOpCheckPart2:
       // Ignore these known opcodes
       break;
     default:
@@ -1276,20 +1275,6 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
       head_lir->u.m.def_mask = &kEncodeAll;
     }
 
-    if (opcode == kMirOpCheck) {
-      // Combine check and work halves of throwing instruction.
-      MIR* work_half = mir->meta.throw_insn;
-      mir->dalvikInsn = work_half->dalvikInsn;
-      mir->optimization_flags = work_half->optimization_flags;
-      mir->meta = work_half->meta;  // Whatever the work_half had, we need to copy it.
-      opcode = work_half->dalvikInsn.opcode;
-      SSARepresentation* ssa_rep = work_half->ssa_rep;
-      work_half->ssa_rep = mir->ssa_rep;
-      mir->ssa_rep = ssa_rep;
-      work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheckPart2);
-      work_half->meta.throw_insn = mir;
-    }
-
     if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
       HandleExtendedMethodMIR(bb, mir);
       continue;
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 73cfe92c45..7ca438225f 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -403,7 +403,6 @@ static int kAllOpcodes[] = {
     kMirOpRangeCheck,
     kMirOpDivZeroCheck,
     kMirOpCheck,
-    kMirOpCheckPart2,
     kMirOpSelect,
 };
 
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 6f2cb25911..a06303d23e 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -138,7 +138,8 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
     FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
     // Check handle scope offset is within frame
     CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
-    // TODO: Insert the read barrier for this load.
+    // Note this LoadRef() already includes the heap poisoning negation.
+    // Note this LoadRef() does not include read barrier. It will be handled below.
     __ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
                mr_conv->MethodRegister(), mirror::ArtMethod::DeclaringClassOffset());
     __ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false);
@@ -189,6 +190,49 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
   size_t current_out_arg_size = main_out_arg_size;
   __ IncreaseFrameSize(main_out_arg_size);
 
+  // Call the read barrier for the declaring class loaded from the method for a static call.
+  // Note that we always have outgoing param space available for at least two params.
+  if (kUseReadBarrier && is_static) {
+    ThreadOffset<4> read_barrier32 = QUICK_ENTRYPOINT_OFFSET(4, pReadBarrierJni);
+    ThreadOffset<8> read_barrier64 = QUICK_ENTRYPOINT_OFFSET(8, pReadBarrierJni);
+    main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
+    main_jni_conv->Next();  // Skip JNIEnv.
+    FrameOffset class_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
+    main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
+    // Pass the handle for the class as the first argument.
+    if (main_jni_conv->IsCurrentParamOnStack()) {
+      FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
+      __ CreateHandleScopeEntry(out_off, class_handle_scope_offset,
+                         mr_conv->InterproceduralScratchRegister(),
+                         false);
+    } else {
+      ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
+      __ CreateHandleScopeEntry(out_reg, class_handle_scope_offset,
+                         ManagedRegister::NoRegister(), false);
+    }
+    main_jni_conv->Next();
+    // Pass the current thread as the second argument and call.
+    if (main_jni_conv->IsCurrentParamInRegister()) {
+      __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
+      if (is_64_bit_target) {
+        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier64),
+                main_jni_conv->InterproceduralScratchRegister());
+      } else {
+        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier32),
+                main_jni_conv->InterproceduralScratchRegister());
+      }
+    } else {
+      __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
+                          main_jni_conv->InterproceduralScratchRegister());
+      if (is_64_bit_target) {
+        __ CallFromThread64(read_barrier64, main_jni_conv->InterproceduralScratchRegister());
+      } else {
+        __ CallFromThread32(read_barrier32, main_jni_conv->InterproceduralScratchRegister());
+      }
+    }
+    main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));  // Reset.
+  }
+
   // 6. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable
   //    can occur. The result is the saved JNI local state that is restored by the exit call. We
   //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index dbdcc96fc1..a871a82d95 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -176,7 +176,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) {
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(111 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(112 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 92fa6db507..b2b54965b5 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -281,15 +281,22 @@ class ArrayAccessInsideLoopFinder : public ValueObject {
     return false;
   }
 
+  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
+    for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) {
+      HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i);
+      if (!block->Dominates(back_edge)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   void Run() {
     HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
-    // Must be simplified loop.
-    DCHECK_EQ(loop_info->GetBackEdges().Size(), 1U);
     for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
       HBasicBlock* block = it_loop.Current();
       DCHECK(block->IsInLoop());
-      HBasicBlock* back_edge = loop_info->GetBackEdges().Get(0);
-      if (!block->Dominates(back_edge)) {
+      if (!DominatesAllBackEdges(block, loop_info)) {
         // In order not to trigger deoptimization unnecessarily, make sure
         // that all array accesses collected are really executed in the loop.
         // For array accesses in a branch inside the loop, don't collect the
@@ -1151,9 +1158,26 @@ class BCEVisitor : public HGraphVisitor {
     bounds_check->GetBlock()->RemoveInstruction(bounds_check);
   }
 
+  static bool HasSameInputAtBackEdges(HPhi* phi) {
+    DCHECK(phi->IsLoopHeaderPhi());
+    // Start with input 1. Input 0 is from the incoming block.
+    HInstruction* input1 = phi->InputAt(1);
+    DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+        *phi->GetBlock()->GetPredecessors().Get(1)));
+    for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
+      DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+          *phi->GetBlock()->GetPredecessors().Get(i)));
+      if (input1 != phi->InputAt(i)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   void VisitPhi(HPhi* phi) {
-    if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
-      DCHECK_EQ(phi->InputCount(), 2U);
+    if (phi->IsLoopHeaderPhi()
+        && (phi->GetType() == Primitive::kPrimInt)
+        && HasSameInputAtBackEdges(phi)) {
       HInstruction* instruction = phi->InputAt(1);
       HInstruction *left;
       int32_t increment;
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 97be778dbd..163458f75c 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -42,7 +42,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -147,7 +147,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -219,7 +219,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -291,7 +291,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -364,7 +364,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
                               int initial,
                               int increment,
                               IfCondition cond = kCondGE) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -501,7 +501,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
                               int initial,
                               int increment = -1,
                               IfCondition cond = kCondLE) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -632,7 +632,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
                               int initial,
                               int increment,
                               IfCondition cond) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -743,7 +743,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
                               HInstruction** bounds_check,
                               int initial,
                               IfCondition cond = kCondGE) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -868,7 +868,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   graph->SetHasBoundsChecks(true);
 
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 0f44af07b8..a5c6f23343 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -282,7 +282,10 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
 
   // To avoid splitting blocks, we compute ahead of time the instructions that
   // start a new block, and create these blocks.
-  ComputeBranchTargets(code_ptr, code_end, &number_of_branches);
+  if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
+    MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
+    return false;
+  }
 
   // Note that the compiler driver is null when unit testing.
   if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
@@ -349,7 +352,7 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) {
   current_block_ = block;
 }
 
-void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
+bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
                                          const uint16_t* code_end,
                                          size_t* number_of_branches) {
   branch_targets_.SetSize(code_end - code_ptr);
@@ -374,7 +377,14 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
       }
       dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
-      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+
+      if (code_ptr >= code_end) {
+        if (instruction.CanFlowThrough()) {
+          // In the normal case we should never hit this but someone can artificially forge a dex
+          // file to fall-through out the method code. In this case we bail out compilation.
+          return false;
+        }
+      } else if (FindBlockStartingAt(dex_pc) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
       }
@@ -406,7 +416,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
       // Fall-through. Add a block if there is more code afterwards.
       dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
-      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+      if (code_ptr >= code_end) {
+        // In the normal case we should never hit this but someone can artificially forge a dex
+        // file to fall-through out the method code. In this case we bail out compilation.
+        // (A switch can fall-through so we don't need to check CanFlowThrough().)
+        return false;
+      } else if (FindBlockStartingAt(dex_pc) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
       }
@@ -415,6 +430,7 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
       dex_pc += instruction.SizeInCodeUnits();
     }
   }
+  return true;
 }
 
 HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const {
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index dc6d97eb0c..36503ce43a 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -88,7 +88,10 @@ class HGraphBuilder : public ValueObject {
   // the newly created blocks.
   // As a side effect, also compute the number of dex instructions, blocks, and
   // branches.
-  void ComputeBranchTargets(const uint16_t* start,
+  // Returns true if all the branches fall inside the method code, false otherwise.
+  // (In normal cases this should always return true but someone can artificially
+  // create a code unit in which branches fall-through out of it).
+  bool ComputeBranchTargets(const uint16_t* start,
                             const uint16_t* end,
                             size_t* number_of_branches);
   void MaybeUpdateCurrentBlock(size_t index);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index cfe121e0ec..0e776b31f7 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -100,11 +100,11 @@ static bool CheckTypeConsistency(HInstruction* instruction) {
   for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
     if (environment->GetInstructionAt(i) != nullptr) {
       Primitive::Type type = environment->GetInstructionAt(i)->GetType();
-      DCHECK(CheckType(type, locations->GetEnvironmentAt(i)))
-        << type << " " << locations->GetEnvironmentAt(i);
+      DCHECK(CheckType(type, environment->GetLocationAt(i)))
+        << type << " " << environment->GetLocationAt(i);
     } else {
-      DCHECK(locations->GetEnvironmentAt(i).IsInvalid())
-        << locations->GetEnvironmentAt(i);
+      DCHECK(environment->GetLocationAt(i).IsInvalid())
+        << environment->GetLocationAt(i);
     }
   }
   return true;
@@ -680,6 +680,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
                                        locations->GetStackMask(),
                                        environment_size,
                                        inlining_depth);
+  if (environment != nullptr) {
+    // TODO: Handle parent environment.
+    DCHECK(environment->GetParent() == nullptr);
+    DCHECK_EQ(environment->GetDexPc(), dex_pc);
+  }
 
   // Walk over the environment, and record the location of dex registers.
   for (size_t i = 0; i < environment_size; ++i) {
@@ -689,7 +694,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
       continue;
     }
 
-    Location location = locations->GetEnvironmentAt(i);
+    Location location = environment->GetLocationAt(i);
     switch (location.GetKind()) {
       case Location::kConstant: {
         DCHECK_EQ(current, location.GetConstant());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e4c37deb8b..f56e446605 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -112,6 +112,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM {
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -3539,8 +3543,18 @@ void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction)
 void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                        HBasicBlock* successor) {
   SuspendCheckSlowPathARM* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
 
   __ LoadFromOffset(
       kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 9e02a1d850..b1cb8802b3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -285,6 +285,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -975,14 +979,12 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset,
   BlockPoolsScope block_pools(GetVIXLAssembler());
   __ Ldr(lr, MemOperand(tr, entry_point_offset));
   __ Blr(lr);
-  if (instruction != nullptr) {
-    RecordPcInfo(instruction, dex_pc, slow_path);
-    DCHECK(instruction->IsSuspendCheck()
-        || instruction->IsBoundsCheck()
-        || instruction->IsNullCheck()
-        || instruction->IsDivZeroCheck()
-        || !IsLeafMethod());
-    }
+  RecordPcInfo(instruction, dex_pc, slow_path);
+  DCHECK(instruction->IsSuspendCheck()
+         || instruction->IsBoundsCheck()
+         || instruction->IsNullCheck()
+         || instruction->IsDivZeroCheck()
+         || !IsLeafMethod());
 }
 
 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
@@ -1034,8 +1036,19 @@ void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                          HBasicBlock* successor) {
   SuspendCheckSlowPathARM64* slow_path =
-    new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   Register temp = temps.AcquireW();
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8aa77969fc..2848a48a64 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -153,6 +153,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -809,7 +813,6 @@ void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -2827,7 +2830,11 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
   Register low = loc.AsRegisterPairLow<Register>();
   Register high = loc.AsRegisterPairHigh<Register>();
-  if (shift == 32) {
+  if (shift == 1) {
+    // This is just an addition.
+    __ addl(low, low);
+    __ adcl(high, high);
+  } else if (shift == 32) {
     // Shift by 32 is easy. High gets low, and low gets 0.
     codegen_->EmitParallelMoves(
         loc.ToLow(),
@@ -3993,8 +4000,19 @@ void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction)
 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                        HBasicBlock* successor) {
   SuspendCheckSlowPathX86* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   __ fs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
   if (successor == nullptr) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5ac68668ba..e633970279 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -99,7 +99,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
       if (is_div_) {
         __ negq(cpu_reg_);
       } else {
-        __ movq(cpu_reg_, Immediate(0));
+        __ xorl(cpu_reg_, cpu_reg_);
       }
     }
     __ jmp(GetExitLabel());
@@ -136,6 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -671,7 +675,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
         DCHECK(constant->IsLongConstant());
         value = constant->AsLongConstant()->GetValue();
       }
-      __ movq(CpuRegister(TMP), Immediate(value));
+      Load64BitValue(CpuRegister(TMP), value);
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     } else {
       DCHECK(source.IsDoubleStackSlot());
@@ -704,9 +708,9 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction,
     } else if (const_to_move->IsLongConstant()) {
       int64_t value = const_to_move->AsLongConstant()->GetValue();
       if (location.IsRegister()) {
-        __ movq(location.AsRegister<CpuRegister>(), Immediate(value));
+        Load64BitValue(location.AsRegister<CpuRegister>(), value);
       } else if (location.IsDoubleStackSlot()) {
-        __ movq(CpuRegister(TMP), Immediate(value));
+        Load64BitValue(CpuRegister(TMP), value);
         __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
       } else {
         DCHECK(location.IsConstant());
@@ -771,7 +775,6 @@ void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -956,7 +959,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
     LocationSummary* locations = comp->GetLocations();
     CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
     // Clear register: setcc only sets the low byte.
-    __ xorq(reg, reg);
+    __ xorl(reg, reg);
     Location lhs = locations->InAt(0);
     Location rhs = locations->InAt(1);
     if (rhs.IsRegister()) {
@@ -1419,8 +1422,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
   // Set the hidden argument.
-  __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(),
-          Immediate(invoke->GetDexMethodIndex()));
+  CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>();
+  codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
 
   // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
@@ -1856,7 +1859,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
           Label done, nan;
 
-          __ movq(output, Immediate(kPrimLongMax));
+          codegen_->Load64BitValue(output, kPrimLongMax);
           // temp = long-to-float(output)
           __ cvtsi2ss(temp, output, true);
           // if input >= temp goto done
@@ -1869,7 +1872,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
           __ jmp(&done);
           __ Bind(&nan);
           //  output = 0
-          __ xorq(output, output);
+          __ xorl(output, output);
           __ Bind(&done);
           break;
         }
@@ -1881,7 +1884,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
           Label done, nan;
 
-          __ movq(output, Immediate(kPrimLongMax));
+          codegen_->Load64BitValue(output, kPrimLongMax);
           // temp = long-to-double(output)
           __ cvtsi2sd(temp, output, true);
           // if input >= temp goto done
@@ -1894,7 +1897,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
           __ jmp(&done);
           __ Bind(&nan);
           //  output = 0
-          __ xorq(output, output);
+          __ xorl(output, output);
           __ Bind(&done);
           break;
         }
@@ -2483,7 +2486,7 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr
 
     case Primitive::kPrimLong: {
       if (instruction->IsRem()) {
-        __ xorq(output_register, output_register);
+        __ xorl(output_register, output_register);
       } else {
         __ movq(output_register, input_register);
         if (imm == -1) {
@@ -2527,7 +2530,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
 
-    __ movq(rdx, Immediate(std::abs(imm) - 1));
+    codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
     __ addq(rdx, numerator);
     __ testq(numerator, numerator);
     __ cmov(kGreaterEqual, rdx, numerator);
@@ -2624,7 +2627,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
     __ movq(numerator, rax);
 
     // RAX = magic
-    __ movq(rax, Immediate(magic));
+    codegen_->Load64BitValue(rax, magic);
 
     // RDX:RAX = magic * numerator
     __ imulq(numerator);
@@ -2653,8 +2656,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
       if (IsInt<32>(imm)) {
         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
       } else {
-        __ movq(numerator, Immediate(imm));
-        __ imulq(rdx, numerator);
+        __ imulq(rdx, codegen_->LiteralInt64Address(imm));
       }
 
       __ subq(rax, rdx);
@@ -3020,8 +3022,8 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
-  __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
-
+  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+                           instruction->GetTypeIndex());
   __ gs()->call(
       Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
 
@@ -3042,7 +3044,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2)));
-  __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
+  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+                           instruction->GetTypeIndex());
 
   __ gs()->call(
       Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
@@ -3864,8 +3867,19 @@ void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instructio
 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                           HBasicBlock* successor) {
   SuspendCheckSlowPathX86_64* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
   __ gs()->cmpw(Address::Absolute(
       Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
   if (successor == nullptr) {
@@ -3938,45 +3952,42 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
     } else if (constant->IsLongConstant()) {
       int64_t value = constant->AsLongConstant()->GetValue();
       if (destination.IsRegister()) {
-        __ movq(destination.AsRegister<CpuRegister>(), Immediate(value));
+        codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
-        __ movq(CpuRegister(TMP), Immediate(value));
+        codegen_->Load64BitValue(CpuRegister(TMP), value);
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
       int32_t value = bit_cast<int32_t, float>(fp_value);
-      Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
         if (value == 0) {
           // easy FP 0.0.
           __ xorps(dest, dest);
         } else {
-          __ movl(CpuRegister(TMP), imm);
-          __ movd(dest, CpuRegister(TMP));
+          __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
         }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
+        Immediate imm(value);
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
       }
     } else {
       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
       double fp_value =  constant->AsDoubleConstant()->GetValue();
       int64_t value = bit_cast<int64_t, double>(fp_value);
-      Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
         if (value == 0) {
           __ xorpd(dest, dest);
         } else {
-          __ movq(CpuRegister(TMP), imm);
-          __ movd(dest, CpuRegister(TMP));
+          __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
         }
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
-        __ movq(CpuRegister(TMP), imm);
+        codegen_->Load64BitValue(CpuRegister(TMP), value);
         __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
       }
     }
@@ -4435,6 +4446,17 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
+void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
+  if (value == 0) {
+    __ xorl(dest, dest);
+  } else if (value > 0 && IsInt<32>(value)) {
+    // We can use a 32 bit move, as it will zero-extend and is one byte shorter.
+    __ movl(dest, Immediate(static_cast<int32_t>(value)));
+  } else {
+    __ movq(dest, Immediate(value));
+  }
+}
+
 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   // Generate the constant area if needed.
   X86_64Assembler* assembler = GetAssembler();
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 13f9c46b5e..480ea6b9c9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -282,6 +282,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   Address LiteralInt32Address(int32_t v);
   Address LiteralInt64Address(int64_t v);
 
+  // Load a 64 bit value into a register in the most efficient manner.
+  void Load64BitValue(CpuRegister dest, int64_t value);
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 94f56e5d3e..bfed1a89de 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -225,7 +225,7 @@ static void RunCodeOptimized(HGraph* graph,
 static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
-  HGraph* graph = new (&arena) HGraph(&arena);
+  HGraph* graph = CreateGraph(&arena);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
@@ -238,7 +238,7 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe
 static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
-  HGraph* graph = new (&arena) HGraph(&arena);
+  HGraph* graph = CreateGraph(&arena);
   HGraphBuilder builder(graph, Primitive::kPrimLong);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
@@ -504,7 +504,7 @@ TEST(CodegenTest, NonMaterializedCondition) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -623,7 +623,7 @@ TEST(CodegenTest, MaterializedCondition1) {
   for (size_t i = 0; i < arraysize(lhs); i++) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
-    HGraph* graph = new (&allocator) HGraph(&allocator);
+    HGraph* graph = CreateGraph(&allocator);
 
     HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
     graph->AddBlock(entry_block);
@@ -669,7 +669,7 @@ TEST(CodegenTest, MaterializedCondition2) {
   for (size_t i = 0; i < arraysize(lhs); i++) {
     ArenaPool pool;
     ArenaAllocator allocator(&pool);
-    HGraph* graph = new (&allocator) HGraph(&allocator);
+    HGraph* graph = CreateGraph(&allocator);
 
     HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
     graph->AddBlock(entry_block);
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index b7a92b5ae5..5a1d9b488f 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -28,6 +28,7 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
   void VisitShift(HBinaryOperation* shift);
 
   void VisitAnd(HAnd* instruction) OVERRIDE;
+  void VisitCompare(HCompare* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
   void VisitOr(HOr* instruction) OVERRIDE;
   void VisitRem(HRem* instruction) OVERRIDE;
@@ -108,6 +109,26 @@ void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) {
   }
 }
 
+void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction) {
+  HConstant* input_cst = instruction->GetConstantRight();
+  if (input_cst != nullptr) {
+    HInstruction* input_value = instruction->GetLeastConstantLeft();
+    if (Primitive::IsFloatingPointType(input_value->GetType()) &&
+        ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) ||
+         (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) {
+      // Replace code looking like
+      //    CMP{G,L} dst, src, NaN
+      // with
+      //    CONSTANT +1 (gt bias)
+      // or
+      //    CONSTANT -1 (lt bias)
+      instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimInt,
+                                                       (instruction->IsGtBias() ? 1 : -1)));
+      instruction->GetBlock()->RemoveInstruction(instruction);
+    }
+  }
+}
+
 void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   Primitive::Type type = instruction->GetType();
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index cd427c5ed8..6fbe75e802 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -47,6 +47,12 @@ static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) {
   }
 }
 
+static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) {
+  for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) {
+    set->SetBit(it.Current()->GetHeader()->GetBlockId());
+  }
+}
+
 void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
   if (stats_ != nullptr) {
     stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction,
@@ -58,18 +64,24 @@ void HDeadCodeElimination::RemoveDeadBlocks() {
   // Classify blocks as reachable/unreachable.
   ArenaAllocator* allocator = graph_->GetArena();
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false);
+  ArenaBitVector affected_loops(allocator, graph_->GetBlocks().Size(), false);
+
   MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks);
 
-  // Remove all dead blocks. Process blocks in post-order, because removal needs
-  // the block's chain of dominators.
+  // Remove all dead blocks. Iterate in post order because removal needs the
+  // block's chain of dominators and nested loops need to be updated from the
+  // inside out.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block  = it.Current();
-    if (live_blocks.IsBitSet(block->GetBlockId())) {
-      // If this block is part of a loop that is being dismantled, we need to
-      // update its loop information.
-      block->UpdateLoopInformation();
+    int id = block->GetBlockId();
+    if (live_blocks.IsBitSet(id)) {
+      if (affected_loops.IsBitSet(id)) {
+        DCHECK(block->IsLoopHeader());
+        block->GetLoopInformation()->Update();
+      }
     } else {
       MaybeRecordDeadBlock(block);
+      MarkLoopHeadersContaining(*block, &affected_loops);
       block->DisconnectAndDelete();
     }
   }
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 0bea0fc1c2..59a57c4345 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,13 +31,13 @@ class HDeadCodeElimination : public HOptimization {
  public:
   HDeadCodeElimination(HGraph* graph,
                        OptimizingCompilerStats* stats = nullptr,
-                       const char* name = kDeadCodeEliminationPassName)
+                       const char* name = kInitialDeadCodeEliminationPassName)
       : HOptimization(graph, true, name, stats) {}
 
   void Run() OVERRIDE;
 
-  static constexpr const char* kDeadCodeEliminationPassName =
-    "dead_code_elimination";
+  static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
+  static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
 
  private:
   void MaybeRecordDeadBlock(HBasicBlock* block);
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 61a7697301..78ae1dd960 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -27,7 +27,7 @@ namespace art {
 static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_length) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 2bfecc696a..29aa97a83a 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -28,7 +28,7 @@
 namespace art {
 
 static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
@@ -235,14 +235,13 @@ TEST(FindLoopsTest, Loop4) {
 
   TestBlock(graph, 0, false, -1);            // entry block
   TestBlock(graph, 1, false, -1);            // pre header
-  const int blocks2[] = {2, 3, 4, 5, 8};
-  TestBlock(graph, 2, true, 2, blocks2, 5);  // loop header
+  const int blocks2[] = {2, 3, 4, 5};
+  TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2));  // loop header
   TestBlock(graph, 3, false, 2);             // block in loop
-  TestBlock(graph, 4, false, 2);             // original back edge
-  TestBlock(graph, 5, false, 2);             // original back edge
+  TestBlock(graph, 4, false, 2);             // back edge
+  TestBlock(graph, 5, false, 2);             // back edge
   TestBlock(graph, 6, false, -1);            // return block
   TestBlock(graph, 7, false, -1);            // exit block
-  TestBlock(graph, 8, false, 2);             // synthesized back edge
 }
 
 
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 8ea8f3cd79..fd28f0b83f 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -288,6 +288,7 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
 
 void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
   int id = loop_header->GetBlockId();
+  HLoopInformation* loop_information = loop_header->GetLoopInformation();
 
   // Ensure the pre-header block is first in the list of
   // predecessors of a loop header.
@@ -297,57 +298,48 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
         id));
   }
 
-  // Ensure the loop header has only two predecessors and that only the
-  // second one is a back edge.
+  // Ensure the loop header has only one incoming branch and the remaining
+  // predecessors are back edges.
   size_t num_preds = loop_header->GetPredecessors().Size();
   if (num_preds < 2) {
     AddError(StringPrintf(
         "Loop header %d has less than two predecessors: %zu.",
         id,
         num_preds));
-  } else if (num_preds > 2) {
-    AddError(StringPrintf(
-        "Loop header %d has more than two predecessors: %zu.",
-        id,
-        num_preds));
   } else {
-    HLoopInformation* loop_information = loop_header->GetLoopInformation();
     HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
     if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
-    HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
-    if (!loop_information->IsBackEdge(*second_predecessor)) {
-      AddError(StringPrintf(
-          "Second predecessor of loop header %d is not a back edge.",
-          id));
+    for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) {
+      HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i);
+      if (!loop_information->IsBackEdge(*predecessor)) {
+        AddError(StringPrintf(
+            "Loop header %d has multiple incoming (non back edge) blocks.",
+            id));
+      }
     }
   }
 
-  const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks();
+  const ArenaBitVector& loop_blocks = loop_information->GetBlocks();
 
-  // Ensure there is only one back edge per loop.
-  size_t num_back_edges =
-    loop_header->GetLoopInformation()->GetBackEdges().Size();
+  // Ensure back edges belong to the loop.
+  size_t num_back_edges = loop_information->GetBackEdges().Size();
   if (num_back_edges == 0) {
     AddError(StringPrintf(
         "Loop defined by header %d has no back edge.",
         id));
-  } else if (num_back_edges > 1) {
-    AddError(StringPrintf(
-        "Loop defined by header %d has several back edges: %zu.",
-        id,
-        num_back_edges));
   } else {
-    DCHECK_EQ(num_back_edges, 1u);
-    int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId();
-    if (!loop_blocks.IsBitSet(back_edge_id)) {
-      AddError(StringPrintf(
-          "Loop defined by header %d has an invalid back edge %d.",
-          id,
-          back_edge_id));
+    for (size_t i = 0; i < num_back_edges; ++i) {
+      int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId();
+      if (!loop_blocks.IsBitSet(back_edge_id)) {
+        AddError(StringPrintf(
+            "Loop defined by header %d has an invalid back edge %d.",
+            id,
+            back_edge_id));
+      }
     }
   }
 
@@ -394,8 +386,9 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
 
   // Ensure an instruction having an environment is dominated by the
   // instructions contained in the environment.
-  HEnvironment* environment = instruction->GetEnvironment();
-  if (environment != nullptr) {
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
     for (size_t i = 0, e = environment->Size(); i < e; ++i) {
       HInstruction* env_instruction = environment->GetInstructionAt(i);
       if (env_instruction != nullptr
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 923468ff16..eca0d9344f 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -30,7 +30,7 @@ namespace art {
  *     1: Exit
  */
 HGraph* CreateSimpleCFG(ArenaAllocator* allocator) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry_block = new (allocator) HBasicBlock(graph);
   entry_block->AddInstruction(new (allocator) HGoto());
   graph->AddBlock(entry_block);
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 50398b4790..59d50926ad 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -73,7 +73,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock1) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
   HBasicBlock* if_true = createGotoBlock(graph, &allocator);
@@ -108,7 +108,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock2) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
   HBasicBlock* if_false = createGotoBlock(graph, &allocator);
@@ -143,7 +143,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
   HBasicBlock* return_block = createReturnBlock(graph, &allocator);
@@ -178,7 +178,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
   HBasicBlock* return_block = createReturnBlock(graph, &allocator);
@@ -213,7 +213,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders1) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
@@ -252,7 +252,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders2) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
   HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
   HBasicBlock* if_block = createIfBlock(graph, &allocator);
@@ -288,7 +288,7 @@ TEST(GraphTest, InsertInstructionBefore) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* block = createGotoBlock(graph, &allocator);
   HInstruction* got = block->GetLastInstruction();
   ASSERT_TRUE(got->IsControlFlow());
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index ca9cbc3d01..f5c630bf97 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -17,6 +17,7 @@
 #include "graph_visualizer.h"
 
 #include "code_generator.h"
+#include "dead_code_elimination.h"
 #include "licm.h"
 #include "nodes.h"
 #include "optimization.h"
@@ -211,17 +212,22 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
       output_ << "]";
     }
     if (instruction->HasEnvironment()) {
-      HEnvironment* env = instruction->GetEnvironment();
-      output_ << " (env: [ ";
-      for (size_t i = 0, e = env->Size(); i < e; ++i) {
-        HInstruction* insn = env->GetInstructionAt(i);
-        if (insn != nullptr) {
-          output_ << GetTypeId(insn->GetType()) << insn->GetId() << " ";
-        } else {
-          output_ << " _ ";
+      output_ << " (env:";
+      for (HEnvironment* environment = instruction->GetEnvironment();
+           environment != nullptr;
+           environment = environment->GetParent()) {
+        output_ << " [ ";
+        for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+          HInstruction* insn = environment->GetInstructionAt(i);
+          if (insn != nullptr) {
+            output_ << GetTypeId(insn->GetType()) << insn->GetId() << " ";
+          } else {
+            output_ << " _ ";
+          }
         }
+        output_ << "]";
       }
-      output_ << "])";
+      output_ << ")";
     }
     if (IsPass(SsaLivenessAnalysis::kLivenessPassName)
         && is_after_pass_
@@ -248,7 +254,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
         }
       }
       output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
-    } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)) {
+    } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
+               || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) {
       output_ << " ( loop_header:";
       HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
       if (info == nullptr) {
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index a81d49aa0c..c3ce7e142a 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -29,7 +29,7 @@ TEST(GVNTest, LocalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -78,7 +78,7 @@ TEST(GVNTest, GlobalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -133,7 +133,7 @@ TEST(GVNTest, LoopFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -220,7 +220,7 @@ TEST(GVNTest, LoopSideEffects) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index ada32db047..afffc7ab4f 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -170,7 +170,11 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
     nullptr);
 
   HGraph* callee_graph = new (graph_->GetArena()) HGraph(
-      graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId());
+      graph_->GetArena(),
+      caller_dex_file,
+      method_index,
+      graph_->IsDebuggable(),
+      graph_->GetCurrentInstructionId());
 
   OptimizingCompilerStats inline_stats;
   HGraphBuilder builder(callee_graph,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e79d4f4bdc..46fad17b8f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -137,13 +137,25 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
 
-  if ((input_cst != nullptr) && input_cst->IsZero()) {
-    // Replace code looking like
-    //    SHL dst, src, 0
-    // with
-    //    src
-    instruction->ReplaceWith(input_other);
-    instruction->GetBlock()->RemoveInstruction(instruction);
+  if (input_cst != nullptr) {
+    if (input_cst->IsZero()) {
+      // Replace code looking like
+      //    SHL dst, src, 0
+      // with
+      //    src
+      instruction->ReplaceWith(input_other);
+      instruction->GetBlock()->RemoveInstruction(instruction);
+    } else if (instruction->IsShl() && input_cst->IsOne()) {
+      // Replace Shl looking like
+      //    SHL dst, src, 1
+      // with
+      //    ADD dst, src, src
+      HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
+                                                   input_other,
+                                                   input_other);
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
+      RecordSimplification();
+    }
   }
 }
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 7f7b450003..dccfe9a0ca 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -850,6 +850,94 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+                                       ArmAssembler* assembler,
+                                       CodeGeneratorARM* codegen,
+                                       ArenaAllocator* allocator,
+                                       bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+  Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch if we have a constant.
+  SlowPathCodeARM* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+        std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ b(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    Register char_reg = locations->InAt(1).AsRegister<Register>();
+    __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max());
+    __ cmp(char_reg, ShifterOperand(tmp_reg));
+    slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ b(slow_path->GetEntryLabel(), HI);
+  }
+
+  if (start_at_zero) {
+    DCHECK_EQ(tmp_reg, R2);
+    // Start-index = 0.
+    __ LoadImmediate(tmp_reg, 0);
+  }
+
+  __ LoadFromOffset(kLoadWord, LR, TR,
+                    QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value());
+  __ blx(LR);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(Location::RegisterLocation(R0));
+
+  // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(Location::RegisterLocation(R0));
+
+  // Need a temp for slow-path codepoint compare.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
@@ -951,8 +1039,6 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble)   // Could be done by changing rounding
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ca3de99092..2c4fab0465 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -993,6 +993,91 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+                                       vixl::MacroAssembler* masm,
+                                       CodeGeneratorARM64* codegen,
+                                       ArenaAllocator* allocator,
+                                       bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+  Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch if we have a constant.
+  SlowPathCodeARM64* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    Register char_reg = WRegisterFrom(locations->InAt(1));
+    __ Mov(tmp_reg, 0xFFFF);
+    __ Cmp(char_reg, Operand(tmp_reg));
+    slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ B(hi, slow_path->GetEntryLabel());
+  }
+
+  if (start_at_zero) {
+    // Start-index = 0.
+    __ Mov(tmp_reg, 0);
+  }
+
+  __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value()));
+  __ Blr(lr);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+
+  // Need a temp for slow-path codepoint compare, and need to send start_index=0.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+
+  // Need a temp for slow-path codepoint compare.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false);
+}
+
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
@@ -1080,8 +1165,6 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED
 }
 
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 1eef1eff0b..28b7a07cf9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -16,6 +16,8 @@
 
 #include "intrinsics_x86.h"
 
+#include <limits>
+
 #include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator_x86.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -124,11 +126,8 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
 //       restored!
 class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
  public:
-  explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
-    : invoke_(invoke) {
-      // The temporary register has to be EAX for x86 invokes.
-      DCHECK_EQ(temp, EAX);
-    }
+  explicit IntrinsicSlowPathX86(HInvoke* invoke)
+    : invoke_(invoke) { }
 
   void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
     CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
@@ -880,8 +879,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetOut(Location::SameAsFirstInput());
-  // Needs to be EAX for the invoke.
-  locations->AddTemp(Location::RegisterLocation(EAX));
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
@@ -901,8 +898,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
   // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
   //       we will not optimize the code for constants (which would save a register).
 
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
-      invoke, locations->GetTemp(0).AsRegister<Register>());
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
 
   X86Assembler* assembler = GetAssembler();
@@ -926,8 +922,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(EAX));
-  // Needs to be EAX for the invoke.
-  locations->AddTemp(Location::RegisterLocation(EAX));
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
@@ -939,8 +933,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
 
   Register argument = locations->InAt(1).AsRegister<Register>();
   __ testl(argument, argument);
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
-      invoke, locations->GetTemp(0).AsRegister<Register>());
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -948,6 +941,158 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+                                         ArenaAllocator* allocator,
+                                         bool start_at_zero) {
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // The data needs to be in EDI for scasw. So request that the string is there, anyways.
+  locations->SetInAt(0, Location::RegisterLocation(EDI));
+  // If we look for a constant char, we'll still have to copy it into EAX. So just request the
+  // allocator to do that, anyways. We can still do the constant check by checking the parameter
+  // of the instruction explicitly.
+  // Note: This works as we don't clobber EAX anywhere.
+  locations->SetInAt(1, Location::RegisterLocation(EAX));
+  if (!start_at_zero) {
+    locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
+  }
+  // As we clobber EDI during execution anyways, also use it as the output.
+  locations->SetOut(Location::SameAsFirstInput());
+
+  // repne scasw uses ECX as the counter.
+  locations->AddTemp(Location::RegisterLocation(ECX));
+  // Need another temporary to be able to compute the result.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+                                  X86Assembler* assembler,
+                                  CodeGeneratorX86* codegen,
+                                  ArenaAllocator* allocator,
+                                  bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  Register string_obj = locations->InAt(0).AsRegister<Register>();
+  Register search_value = locations->InAt(1).AsRegister<Register>();
+  Register counter = locations->GetTemp(0).AsRegister<Register>();
+  Register string_length = locations->GetTemp(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  // Check our assumptions for registers.
+  DCHECK_EQ(string_obj, EDI);
+  DCHECK_EQ(search_value, EAX);
+  DCHECK_EQ(counter, ECX);
+  DCHECK_EQ(out, EDI);
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch if we have a constant.
+  SlowPathCodeX86* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+    std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ jmp(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+    slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ j(kAbove, slow_path->GetEntryLabel());
+  }
+
+  // From here down, we know that we are looking for a char that fits in 16 bits.
+  // Location of reference to data array within the String object.
+  int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count within the String object.
+  int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+  // Load string length, i.e., the count field of the string.
+  __ movl(string_length, Address(string_obj, count_offset));
+
+  // Do a zero-length check.
+  // TODO: Support jecxz.
+  Label not_found_label;
+  __ testl(string_length, string_length);
+  __ j(kEqual, &not_found_label);
+
+  if (start_at_zero) {
+    // Number of chars to scan is the same as the string length.
+    __ movl(counter, string_length);
+
+    // Move to the start of the string.
+    __ addl(string_obj, Immediate(value_offset));
+  } else {
+    Register start_index = locations->InAt(2).AsRegister<Register>();
+
+    // Do a start_index check.
+    __ cmpl(start_index, string_length);
+    __ j(kGreaterEqual, &not_found_label);
+
+    // Ensure we have a start index >= 0;
+    __ xorl(counter, counter);
+    __ cmpl(start_index, Immediate(0));
+    __ cmovl(kGreater, counter, start_index);
+
+    // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+    __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+    // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
+    // compare.
+    __ negl(counter);
+    __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+  }
+
+  // Everything is set up for repne scasw:
+  //   * Comparison address in EDI.
+  //   * Counter in ECX.
+  __ repne_scasw();
+
+  // Did we find a match?
+  __ j(kNotEqual, &not_found_label);
+
+  // Yes, we matched.  Compute the index of the result.
+  __ subl(string_length, counter);
+  __ leal(out, Address(string_length, -1));
+
+  Label done;
+  __ jmp(&done);
+
+  // Failed to match; return -1.
+  __ Bind(&not_found_label);
+  __ movl(out, Immediate(-1));
+
+  // And join up at the end.
+  __ Bind(&done);
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
+  CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
+  CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
@@ -958,8 +1103,6 @@ void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke
   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
   locations->SetOut(Location::RegisterLocation(EAX));
-  // Needs to be EAX for the invoke.
-  locations->AddTemp(Location::RegisterLocation(EAX));
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -968,8 +1111,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
 
   Register byte_array = locations->InAt(0).AsRegister<Register>();
   __ testl(byte_array, byte_array);
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
-      invoke, locations->GetTemp(0).AsRegister<Register>());
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -1003,8 +1145,6 @@ void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invok
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   locations->SetOut(Location::RegisterLocation(EAX));
-  // Needs to be EAX for the invoke.
-  locations->AddTemp(Location::RegisterLocation(EAX));
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
@@ -1013,8 +1153,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke)
 
   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
   __ testl(string_to_copy, string_to_copy);
-  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
-      invoke, locations->GetTemp(0).AsRegister<Register>());
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
@@ -1584,8 +1723,6 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
 
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 1fc5432a89..0efa714a23 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,6 +16,8 @@
 
 #include "intrinsics_x86_64.h"
 
+#include <limits>
+
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator_x86_64.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -783,7 +785,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
   __ Bind(&nan);
 
   //  output = 0
-  __ xorq(out, out);
+  __ xorl(out, out);
   __ Bind(&done);
 }
 
@@ -858,6 +860,157 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+                                         ArenaAllocator* allocator,
+                                         bool start_at_zero) {
+  LocationSummary* locations = new (allocator) LocationSummary(invoke,
+                                                               LocationSummary::kCallOnSlowPath,
+                                                               kIntrinsified);
+  // The data needs to be in RDI for scasw. So request that the string is there, anyways.
+  locations->SetInAt(0, Location::RegisterLocation(RDI));
+  // If we look for a constant char, we'll still have to copy it into RAX. So just request the
+  // allocator to do that, anyways. We can still do the constant check by checking the parameter
+  // of the instruction explicitly.
+  // Note: This works as we don't clobber RAX anywhere.
+  locations->SetInAt(1, Location::RegisterLocation(RAX));
+  if (!start_at_zero) {
+    locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
+  }
+  // As we clobber RDI during execution anyways, also use it as the output.
+  locations->SetOut(Location::SameAsFirstInput());
+
+  // repne scasw uses RCX as the counter.
+  locations->AddTemp(Location::RegisterLocation(RCX));
+  // Need another temporary to be able to compute the result.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+                                  X86_64Assembler* assembler,
+                                  CodeGeneratorX86_64* codegen,
+                                  ArenaAllocator* allocator,
+                                  bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  // Check our assumptions for registers.
+  DCHECK_EQ(string_obj.AsRegister(), RDI);
+  DCHECK_EQ(search_value.AsRegister(), RAX);
+  DCHECK_EQ(counter.AsRegister(), RCX);
+  DCHECK_EQ(out.AsRegister(), RDI);
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch if we have a constant.
+  SlowPathCodeX86_64* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+    std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ jmp(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+    slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ j(kAbove, slow_path->GetEntryLabel());
+  }
+
+  // From here down, we know that we are looking for a char that fits in 16 bits.
+  // Location of reference to data array within the String object.
+  int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count within the String object.
+  int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+  // Load string length, i.e., the count field of the string.
+  __ movl(string_length, Address(string_obj, count_offset));
+
+  // Do a length check.
+  // TODO: Support jecxz.
+  Label not_found_label;
+  __ testl(string_length, string_length);
+  __ j(kEqual, &not_found_label);
+
+  if (start_at_zero) {
+    // Number of chars to scan is the same as the string length.
+    __ movl(counter, string_length);
+
+    // Move to the start of the string.
+    __ addq(string_obj, Immediate(value_offset));
+  } else {
+    CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
+
+    // Do a start_index check.
+    __ cmpl(start_index, string_length);
+    __ j(kGreaterEqual, &not_found_label);
+
+    // Ensure we have a start index >= 0;
+    __ xorl(counter, counter);
+    __ cmpl(start_index, Immediate(0));
+    __ cmov(kGreater, counter, start_index, false);  // 32-bit copy is enough.
+
+    // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+    __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+    // Now update ecx, the work counter: it's gonna be string.length - start_index.
+    __ negq(counter);  // Needs to be 64-bit negation, as the address computation is 64-bit.
+    __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+  }
+
+  // Everything is set up for repne scasw:
+  //   * Comparison address in RDI.
+  //   * Counter in ECX.
+  __ repne_scasw();
+
+  // Did we find a match?
+  __ j(kNotEqual, &not_found_label);
+
+  // Yes, we matched.  Compute the index of the result.
+  __ subl(string_length, counter);
+  __ leal(out, Address(string_length, -1));
+
+  Label done;
+  __ jmp(&done);
+
+  // Failed to match; return -1.
+  __ Bind(&not_found_label);
+  __ movl(out, Immediate(-1));
+
+  // And join up at the end.
+  __ Bind(&done);
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
+  CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
@@ -1434,8 +1587,6 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
 }
 
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index bf9b8e59c5..2535ea274a 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -39,8 +39,9 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
     }
   }
 
-  if (instruction->HasEnvironment()) {
-    HEnvironment* environment = instruction->GetEnvironment();
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
     for (size_t i = 0, e = environment->Size(); i < e; ++i) {
       HInstruction* input = environment->GetInstructionAt(i);
       if (input != nullptr) {
@@ -63,13 +64,15 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
  * If `environment` has a loop header phi, we replace it with its first input.
  */
 static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) {
-  for (size_t i = 0, e = environment->Size(); i < e; ++i) {
-    HInstruction* input = environment->GetInstructionAt(i);
-    if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
-      environment->RemoveAsUserOfInput(i);
-      HInstruction* incoming = input->InputAt(0);
-      environment->SetRawEnvAt(i, incoming);
-      incoming->AddEnvUseAt(environment, i);
+  for (; environment != nullptr; environment = environment->GetParent()) {
+    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+      HInstruction* input = environment->GetInstructionAt(i);
+      if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
+        environment->RemoveAsUserOfInput(i);
+        HInstruction* incoming = input->InputAt(0);
+        environment->SetRawEnvAt(i, incoming);
+        incoming->AddEnvUseAt(environment, i);
+      }
     }
   }
 }
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 7818c606db..4f259b5095 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -39,7 +39,7 @@ namespace art {
 static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 52367730ed..7cb00a1923 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -32,7 +32,7 @@
 namespace art {
 
 static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 8a96ee9ace..9d7d0b6c67 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector,
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
@@ -445,44 +445,40 @@ TEST(LivenessTest, Loop5) {
 
 TEST(LivenessTest, Loop6) {
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 2, phi in block 8)
+  // (constant0, constant4, constant5, phi in block 2)
   const char* expected =
     "Block 0\n"
-    "  live in: (00000)\n"
-    "  live out: (11100)\n"
-    "  kill: (11100)\n"
+    "  live in: (0000)\n"
+    "  live out: (1110)\n"
+    "  kill: (1110)\n"
     "Block 1\n"
-    "  live in: (11100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
+    "  live in: (1110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 2\n"  // loop header
-    "  live in: (01100)\n"
-    "  live out: (01110)\n"
-    "  kill: (00010)\n"
+    "  live in: (0110)\n"
+    "  live out: (0111)\n"
+    "  kill: (0001)\n"
     "Block 3\n"
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
-    "Block 4\n"  // original back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
-    "Block 5\n"  // original back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00000)\n"
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
+    "Block 4\n"  // back edge
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
+    "Block 5\n"  // back edge
+    "  live in: (0110)\n"
+    "  live out: (0110)\n"
+    "  kill: (0000)\n"
     "Block 6\n"  // return block
-    "  live in: (00010)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
+    "  live in: (0001)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n"
     "Block 7\n"  // exit block
-    "  live in: (00000)\n"
-    "  live out: (00000)\n"
-    "  kill: (00000)\n"
-    "Block 8\n"  // synthesized back edge
-    "  live in: (01100)\n"
-    "  live out: (01100)\n"
-    "  kill: (00001)\n";
+    "  live in: (0000)\n"
+    "  live out: (0000)\n"
+    "  kill: (0000)\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index a1ae67009e..42aba04828 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -25,8 +25,6 @@ LocationSummary::LocationSummary(HInstruction* instruction,
                                  bool intrinsified)
     : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
       temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
-      environment_(instruction->GetBlock()->GetGraph()->GetArena(),
-                   instruction->EnvironmentSize()),
       output_overlaps_(Location::kOutputOverlap),
       call_kind_(call_kind),
       stack_mask_(nullptr),
@@ -37,10 +35,6 @@ LocationSummary::LocationSummary(HInstruction* instruction,
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
     inputs_.Put(i, Location());
   }
-  environment_.SetSize(instruction->EnvironmentSize());
-  for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
-    environment_.Put(i, Location());
-  }
   instruction->SetLocations(this);
 
   if (NeedsSafepoint()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index c3a99150c4..09bbb33042 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -525,14 +525,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
     return temps_.Size();
   }
 
-  void SetEnvironmentAt(uint32_t at, Location location) {
-    environment_.Put(at, location);
-  }
-
-  Location GetEnvironmentAt(uint32_t at) const {
-    return environment_.Get(at);
-  }
-
   Location Out() const { return output_; }
 
   bool CanCall() const { return call_kind_ != kNoCall; }
@@ -602,7 +594,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
  private:
   GrowableArray<Location> inputs_;
   GrowableArray<Location> temps_;
-  GrowableArray<Location> environment_;
   // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
   // share the same register as the inputs.
   Location::OutputOverlap output_overlaps_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d3ee770941..41adc7223e 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -17,6 +17,7 @@
 #include "nodes.h"
 
 #include "ssa_builder.h"
+#include "base/bit_vector-inl.h"
 #include "utils/growable_array.h"
 #include "scoped_thread_state_change.h"
 
@@ -37,8 +38,9 @@ static void RemoveAsUser(HInstruction* instruction) {
     instruction->RemoveAsUserOfInput(i);
   }
 
-  HEnvironment* environment = instruction->GetEnvironment();
-  if (environment != nullptr) {
+  for (HEnvironment* environment = instruction->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
     for (size_t i = 0, e = environment->Size(); i < e; ++i) {
       if (environment->GetInstructionAt(i) != nullptr) {
         environment->RemoveAsUserOfInput(i);
@@ -191,24 +193,6 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
 void HGraph::SimplifyLoop(HBasicBlock* header) {
   HLoopInformation* info = header->GetLoopInformation();
 
-  // If there are more than one back edge, make them branch to the same block that
-  // will become the only back edge. This simplifies finding natural loops in the
-  // graph.
-  // Also, if the loop is a do/while (that is the back edge is an if), change the
-  // back edge to be a goto. This simplifies code generation of suspend cheks.
-  if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) {
-    HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc());
-    AddBlock(new_back_edge);
-    new_back_edge->AddInstruction(new (arena_) HGoto());
-    for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
-      HBasicBlock* back_edge = info->GetBackEdges().Get(pred);
-      back_edge->ReplaceSuccessor(header, new_back_edge);
-    }
-    info->ClearBackEdges();
-    info->AddBackEdge(new_back_edge);
-    new_back_edge->AddSuccessor(header);
-  }
-
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
   // loop.
@@ -218,11 +202,9 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto());
 
-    ArenaBitVector back_edges(arena_, GetBlocks().Size(), false);
-    HBasicBlock* back_edge = info->GetBackEdges().Get(0);
     for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) {
       HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
-      if (predecessor != back_edge) {
+      if (!info->IsBackEdge(*predecessor)) {
         predecessor->ReplaceSuccessor(header, pre_header);
         pred--;
       }
@@ -230,9 +212,17 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
     pre_header->AddSuccessor(header);
   }
 
-  // Make sure the second predecessor of a loop header is the back edge.
-  if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) {
-    header->SwapPredecessors();
+  // Make sure the first predecessor of a loop header is the incoming block.
+  if (info->IsBackEdge(*header->GetPredecessors().Get(0))) {
+    HBasicBlock* to_swap = header->GetPredecessors().Get(0);
+    for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) {
+      HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
+      if (!info->IsBackEdge(*predecessor)) {
+        header->predecessors_.Put(pred, to_swap);
+        header->predecessors_.Put(0, predecessor);
+        break;
+      }
+    }
   }
 
   // Place the suspend check at the beginning of the header, so that live registers
@@ -357,24 +347,59 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) {
 }
 
 bool HLoopInformation::Populate() {
-  DCHECK_EQ(GetBackEdges().Size(), 1u);
-  HBasicBlock* back_edge = GetBackEdges().Get(0);
-  DCHECK(back_edge->GetDominator() != nullptr);
-  if (!header_->Dominates(back_edge)) {
-    // This loop is not natural. Do not bother going further.
-    return false;
-  }
+  DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated";
+  for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) {
+    HBasicBlock* back_edge = GetBackEdges().Get(i);
+    DCHECK(back_edge->GetDominator() != nullptr);
+    if (!header_->Dominates(back_edge)) {
+      // This loop is not natural. Do not bother going further.
+      return false;
+    }
 
-  // Populate this loop: starting with the back edge, recursively add predecessors
-  // that are not already part of that loop. Set the header as part of the loop
-  // to end the recursion.
-  // This is a recursive implementation of the algorithm described in
-  // "Advanced Compiler Design & Implementation" (Muchnick) p192.
-  blocks_.SetBit(header_->GetBlockId());
-  PopulateRecursive(back_edge);
+    // Populate this loop: starting with the back edge, recursively add predecessors
+    // that are not already part of that loop. Set the header as part of the loop
+    // to end the recursion.
+    // This is a recursive implementation of the algorithm described in
+    // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+    blocks_.SetBit(header_->GetBlockId());
+    PopulateRecursive(back_edge);
+  }
   return true;
 }
 
+void HLoopInformation::Update() {
+  HGraph* graph = header_->GetGraph();
+  for (uint32_t id : blocks_.Indexes()) {
+    HBasicBlock* block = graph->GetBlocks().Get(id);
+    // Reset loop information of non-header blocks inside the loop, except
+    // members of inner nested loops because those should already have been
+    // updated by their own LoopInformation.
+    if (block->GetLoopInformation() == this && block != header_) {
+      block->SetLoopInformation(nullptr);
+    }
+  }
+  blocks_.ClearAllBits();
+
+  if (back_edges_.IsEmpty()) {
+    // The loop has been dismantled, delete its suspend check and remove info
+    // from the header.
+    DCHECK(HasSuspendCheck());
+    header_->RemoveInstruction(suspend_check_);
+    header_->SetLoopInformation(nullptr);
+    header_ = nullptr;
+    suspend_check_ = nullptr;
+  } else {
+    if (kIsDebugBuild) {
+      for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+        DCHECK(header_->Dominates(back_edges_.Get(i)));
+      }
+    }
+    // This loop still has reachable back edges. Repopulate the list of blocks.
+    bool populate_successful = Populate();
+    DCHECK(populate_successful);
+  }
+}
+
 HBasicBlock* HLoopInformation::GetPreHeader() const {
   return header_->GetDominator();
 }
@@ -387,6 +412,14 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const {
   return other.blocks_.IsBitSet(header_->GetBlockId());
 }
 
+size_t HLoopInformation::GetLifetimeEnd() const {
+  size_t last_position = 0;
+  for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+    last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position);
+  }
+  return last_position;
+}
+
 bool HBasicBlock::Dominates(HBasicBlock* other) const {
   // Walk up the dominator tree from `other`, to find out if `this`
   // is an ancestor.
@@ -503,6 +536,16 @@ void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_
   }
 }
 
+void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) {
+  for (size_t i = 0; i < locals.Size(); i++) {
+    HInstruction* instruction = locals.Get(i);
+    SetRawEnvAt(i, instruction);
+    if (instruction != nullptr) {
+      instruction->AddEnvUseAt(this, i);
+    }
+  }
+}
+
 void HEnvironment::CopyFrom(HEnvironment* env) {
   for (size_t i = 0; i < env->Size(); i++) {
     HInstruction* instruction = env->GetInstructionAt(i);
@@ -963,8 +1006,9 @@ void HBasicBlock::DisconnectAndDelete() {
     HLoopInformation* loop_info = it.Current();
     loop_info->Remove(this);
     if (loop_info->IsBackEdge(*this)) {
-      // This deliberately leaves the loop in an inconsistent state and will
-      // fail SSAChecker unless the entire loop is removed during the pass.
+      // If this was the last back edge of the loop, we deliberately leave the
+      // loop in an inconsistent state and will fail SSAChecker unless the
+      // entire loop is removed during the pass.
       loop_info->RemoveBackEdge(this);
     }
   }
@@ -1040,20 +1084,6 @@ void HBasicBlock::DisconnectAndDelete() {
   SetGraph(nullptr);
 }
 
-void HBasicBlock::UpdateLoopInformation() {
-  // Check if loop information points to a dismantled loop. If so, replace with
-  // the loop information of a larger loop which contains this block, or nullptr
-  // otherwise. We iterate in case the larger loop has been destroyed too.
-  while (IsInLoop() && loop_information_->GetBackEdges().IsEmpty()) {
-    if (IsLoopHeader()) {
-      HSuspendCheck* suspend_check = loop_information_->GetSuspendCheck();
-      DCHECK_EQ(suspend_check->GetBlock(), this);
-      RemoveInstruction(suspend_check);
-    }
-    loop_information_ = loop_information_->GetPreHeader()->GetLoopInformation();
-  }
-}
-
 void HBasicBlock::MergeWith(HBasicBlock* other) {
   DCHECK_EQ(GetGraph(), other->GetGraph());
   DCHECK(GetDominatedBlocks().Contains(other));
@@ -1075,8 +1105,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) {
     HLoopInformation* loop_info = it.Current();
     loop_info->Remove(other);
     if (loop_info->IsBackEdge(*other)) {
-      loop_info->ClearBackEdges();
-      loop_info->AddBackEdge(this);
+      loop_info->ReplaceBackEdge(other, this);
     }
   }
 
@@ -1307,11 +1336,9 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
         loop_it.Current()->Add(to);
       }
       if (info->IsBackEdge(*at)) {
-        // Only `at` can become a back edge, as the inlined blocks
-        // are predecessors of `at`.
-        DCHECK_EQ(1u, info->NumberOfBackEdges());
-        info->ClearBackEdges();
-        info->AddBackEdge(to);
+        // Only `to` can become a back edge, as the inlined blocks
+        // are predecessors of `to`.
+        info->ReplaceBackEdge(at, to);
       }
     }
   }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 63f3c95c7d..0089f22169 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -48,6 +48,7 @@ class HPhi;
 class HSuspendCheck;
 class LiveInterval;
 class LocationSummary;
+class SlowPathCode;
 class SsaBuilder;
 
 static const int kDefaultNumberOfBlocks = 8;
@@ -116,7 +117,11 @@ class HInstructionList {
 // Control-flow graph of a method. Contains a list of basic blocks.
 class HGraph : public ArenaObject<kArenaAllocMisc> {
  public:
-  HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0)
+  HGraph(ArenaAllocator* arena,
+         const DexFile& dex_file,
+         uint32_t method_idx,
+         bool debuggable = false,
+         int start_instruction_id = 0)
       : arena_(arena),
         blocks_(arena, kDefaultNumberOfBlocks),
         reverse_post_order_(arena, kDefaultNumberOfBlocks),
@@ -130,6 +135,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
         has_bounds_checks_(false),
         debuggable_(debuggable),
         current_instruction_id_(start_instruction_id),
+        dex_file_(dex_file),
+        method_idx_(method_idx),
         cached_null_constant_(nullptr),
         cached_int_constants_(std::less<int32_t>(), arena->Adapter()),
         cached_float_constants_(std::less<int32_t>(), arena->Adapter()),
@@ -262,6 +269,14 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
 
   HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
 
+  const DexFile& GetDexFile() const {
+    return dex_file_;
+  }
+
+  uint32_t GetMethodIdx() const {
+    return method_idx_;
+  }
+
  private:
   void VisitBlockForDominatorTree(HBasicBlock* block,
                                   HBasicBlock* predecessor,
@@ -338,6 +353,12 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int32_t current_instruction_id_;
 
+  // The dex file from which the method is from.
+  const DexFile& dex_file_;
+
+  // The method index in the dex file.
+  const uint32_t method_idx_;
+
   // Cached constants.
   HNullConstant* cached_null_constant_;
   ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_;
@@ -397,19 +418,30 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> {
     return back_edges_;
   }
 
-  HBasicBlock* GetSingleBackEdge() const {
-    DCHECK_EQ(back_edges_.Size(), 1u);
-    return back_edges_.Get(0);
-  }
+  // Returns the lifetime position of the back edge that has the
+  // greatest lifetime position.
+  size_t GetLifetimeEnd() const;
 
-  void ClearBackEdges() {
-    back_edges_.Reset();
+  void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) {
+    for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+      if (back_edges_.Get(i) == existing) {
+        back_edges_.Put(i, new_back_edge);
+        return;
+      }
+    }
+    UNREACHABLE();
   }
 
-  // Find blocks that are part of this loop. Returns whether the loop is a natural loop,
+  // Finds blocks that are part of this loop. Returns whether the loop is a natural loop,
   // that is the header dominates the back edge.
   bool Populate();
 
+  // Reanalyzes the loop by removing loop info from its blocks and re-running
+  // Populate(). If there are no back edges left, the loop info is completely
+  // removed as well as its SuspendCheck instruction. It must be run on nested
+  // inner loops first.
+  void Update();
+
   // Returns whether this loop information contains `block`.
   // Note that this loop information *must* be populated before entering this function.
   bool Contains(const HBasicBlock& block) const;
@@ -679,14 +711,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
     loop_information_ = info;
   }
 
-  // Checks if the loop information points to a valid loop. If the loop has been
-  // dismantled (does not have a back edge any more), loop information is
-  // removed or replaced with the information of the first valid outer loop.
-  void UpdateLoopInformation();
-
   bool IsInLoop() const { return loop_information_ != nullptr; }
 
-  // Returns wheter this block dominates the blocked passed as parameter.
+  // Returns whether this block dominates the blocked passed as parameter.
   bool Dominates(HBasicBlock* block) const;
 
   size_t GetLifetimeStart() const { return lifetime_start_; }
@@ -928,6 +955,14 @@ class HUseList : public ValueObject {
     return first_ != nullptr && first_->next_ == nullptr;
   }
 
+  size_t SizeSlow() const {
+    size_t count = 0;
+    for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
+      ++count;
+    }
+    return count;
+  }
+
  private:
   HUseListNode<T>* first_;
 };
@@ -1054,15 +1089,43 @@ class SideEffects : public ValueObject {
 // A HEnvironment object contains the values of virtual registers at a given location.
 class HEnvironment : public ArenaObject<kArenaAllocMisc> {
  public:
-  HEnvironment(ArenaAllocator* arena, size_t number_of_vregs)
-     : vregs_(arena, number_of_vregs) {
+  HEnvironment(ArenaAllocator* arena,
+               size_t number_of_vregs,
+               const DexFile& dex_file,
+               uint32_t method_idx,
+               uint32_t dex_pc)
+     : vregs_(arena, number_of_vregs),
+       locations_(arena, number_of_vregs),
+       parent_(nullptr),
+       dex_file_(dex_file),
+       method_idx_(method_idx),
+       dex_pc_(dex_pc) {
     vregs_.SetSize(number_of_vregs);
     for (size_t i = 0; i < number_of_vregs; i++) {
       vregs_.Put(i, HUserRecord<HEnvironment*>());
     }
+
+    locations_.SetSize(number_of_vregs);
+    for (size_t i = 0; i < number_of_vregs; ++i) {
+      locations_.Put(i, Location());
+    }
   }
 
-  void CopyFrom(HEnvironment* env);
+  void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) {
+    parent_ = new (allocator) HEnvironment(allocator,
+                                           parent->Size(),
+                                           parent->GetDexFile(),
+                                           parent->GetMethodIdx(),
+                                           parent->GetDexPc());
+    if (parent->GetParent() != nullptr) {
+      parent_->SetAndCopyParentChain(allocator, parent->GetParent());
+    }
+    parent_->CopyFrom(parent);
+  }
+
+  void CopyFrom(const GrowableArray<HInstruction*>& locals);
+  void CopyFrom(HEnvironment* environment);
+
   // Copy from `env`. If it's a loop phi for `loop_header`, copy the first
   // input to the loop phi instead. This is for inserting instructions that
   // require an environment (like HDeoptimization) in the loop pre-header.
@@ -1080,6 +1143,28 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
 
   size_t Size() const { return vregs_.Size(); }
 
+  HEnvironment* GetParent() const { return parent_; }
+
+  void SetLocationAt(size_t index, Location location) {
+    locations_.Put(index, location);
+  }
+
+  Location GetLocationAt(size_t index) const {
+    return locations_.Get(index);
+  }
+
+  uint32_t GetDexPc() const {
+    return dex_pc_;
+  }
+
+  uint32_t GetMethodIdx() const {
+    return method_idx_;
+  }
+
+  const DexFile& GetDexFile() const {
+    return dex_file_;
+  }
+
  private:
   // Record instructions' use entries of this environment for constant-time removal.
   // It should only be called by HInstruction when a new environment use is added.
@@ -1090,6 +1175,11 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
   }
 
   GrowableArray<HUserRecord<HEnvironment*> > vregs_;
+  GrowableArray<Location> locations_;
+  HEnvironment* parent_;
+  const DexFile& dex_file_;
+  const uint32_t method_idx_;
+  const uint32_t dex_pc_;
 
   friend class HInstruction;
 
@@ -1221,6 +1311,11 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
   }
 
   virtual bool NeedsEnvironment() const { return false; }
+  virtual uint32_t GetDexPc() const {
+    LOG(FATAL) << "GetDexPc() cannot be called on an instruction that"
+                  " does not need an environment";
+    UNREACHABLE();
+  }
   virtual bool IsControlFlow() const { return false; }
   virtual bool CanThrow() const { return false; }
   bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
@@ -1298,14 +1393,30 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
   // copying, the uses lists are being updated.
   void CopyEnvironmentFrom(HEnvironment* environment) {
     ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
-    environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+    environment_ = new (allocator) HEnvironment(
+        allocator,
+        environment->Size(),
+        environment->GetDexFile(),
+        environment->GetMethodIdx(),
+        environment->GetDexPc());
     environment_->CopyFrom(environment);
+    if (environment->GetParent() != nullptr) {
+      environment_->SetAndCopyParentChain(allocator, environment->GetParent());
+    }
   }
 
   void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment,
                                                 HBasicBlock* block) {
     ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
-    environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+    environment_ = new (allocator) HEnvironment(
+        allocator,
+        environment->Size(),
+        environment->GetDexFile(),
+        environment->GetMethodIdx(),
+        environment->GetDexPc());
+    if (environment->GetParent() != nullptr) {
+      environment_->SetAndCopyParentChain(allocator, environment->GetParent());
+    }
     environment_->CopyFromWithLoopPhiAdjustment(environment, block);
   }
 
@@ -1682,7 +1793,7 @@ class HDeoptimize : public HTemplateInstruction<1> {
 
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Deoptimize);
 
@@ -2086,15 +2197,16 @@ class HFloatConstant : public HConstant {
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
-        bit_cast<uint32_t, float>((-1.0f));
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
   }
   bool IsZero() const OVERRIDE {
-    return AsFloatConstant()->GetValue() == 0.0f;
+    return value_ == 0.0f;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
-        bit_cast<uint32_t, float>(1.0f);
+    return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
   }
 
   DECLARE_INSTRUCTION(FloatConstant);
@@ -2124,15 +2236,16 @@ class HDoubleConstant : public HConstant {
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
-        bit_cast<uint64_t, double>((-1.0));
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
   }
   bool IsZero() const OVERRIDE {
-    return AsDoubleConstant()->GetValue() == 0.0;
+    return value_ == 0.0;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
-        bit_cast<uint64_t, double>(1.0);
+    return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
+  }
+  bool IsNaN() const {
+    return std::isnan(value_);
   }
 
   DECLARE_INSTRUCTION(DoubleConstant);
@@ -2251,7 +2364,7 @@ class HInvoke : public HInstruction {
 
   Primitive::Type GetType() const OVERRIDE { return return_type_; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   uint32_t GetDexMethodIndex() const { return dex_method_index_; }
 
@@ -2468,7 +2581,7 @@ class HNewInstance : public HExpression<0> {
         type_index_(type_index),
         entrypoint_(entrypoint) {}
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
 
   // Calls runtime so needs an environment.
@@ -2520,7 +2633,7 @@ class HNewArray : public HExpression<1> {
     SetRawInputAt(0, length);
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
 
   // Calls runtime so needs an environment.
@@ -2615,7 +2728,7 @@ class HDiv : public HBinaryOperation {
     return (y == -1) ? -x : x / y;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Div);
 
@@ -2642,7 +2755,7 @@ class HRem : public HBinaryOperation {
     return (y == -1) ? 0 : x % y;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Rem);
 
@@ -2669,7 +2782,7 @@ class HDivZeroCheck : public HExpression<1> {
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(DivZeroCheck);
 
@@ -2864,7 +2977,7 @@ class HTypeConversion : public HExpression<1> {
 
   // Required by the x86 and ARM code generators when producing calls
   // to the runtime.
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   bool CanBeMoved() const OVERRIDE { return true; }
   bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
@@ -2974,7 +3087,7 @@ class HNullCheck : public HExpression<1> {
 
   bool CanBeNull() const OVERRIDE { return false; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(NullCheck);
 
@@ -3137,7 +3250,7 @@ class HArraySet : public HTemplateInstruction<3> {
 
   bool NeedsTypeCheck() const { return needs_type_check_; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
@@ -3207,7 +3320,7 @@ class HBoundsCheck : public HExpression<2> {
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(BoundsCheck);
 
@@ -3247,19 +3360,25 @@ class HTemporary : public HTemplateInstruction<0> {
 class HSuspendCheck : public HTemplateInstruction<0> {
  public:
   explicit HSuspendCheck(uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {}
+      : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {}
 
   bool NeedsEnvironment() const OVERRIDE {
     return true;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
+  void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; }
+  SlowPathCode* GetSlowPath() const { return slow_path_; }
 
   DECLARE_INSTRUCTION(SuspendCheck);
 
  private:
   const uint32_t dex_pc_;
 
+  // Only used for code generation, in order to share the same slow path between back edges
+  // of a same loop.
+  SlowPathCode* slow_path_;
+
   DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
 };
 
@@ -3286,7 +3405,7 @@ class HLoadClass : public HExpression<0> {
 
   size_t ComputeHashCode() const OVERRIDE { return type_index_; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
   bool IsReferrersClass() const { return is_referrers_class_; }
 
@@ -3360,7 +3479,7 @@ class HLoadString : public HExpression<0> {
 
   size_t ComputeHashCode() const OVERRIDE { return string_index_; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint32_t GetStringIndex() const { return string_index_; }
 
   // TODO: Can we deopt or debug when we resolve a string?
@@ -3398,7 +3517,7 @@ class HClinitCheck : public HExpression<1> {
     return true;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
 
@@ -3498,7 +3617,7 @@ class HThrow : public HTemplateInstruction<1> {
 
   bool CanThrow() const OVERRIDE { return true; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Throw);
 
@@ -3532,7 +3651,7 @@ class HInstanceOf : public HExpression<2> {
     return false;
   }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   bool IsClassFinal() const { return class_is_final_; }
 
@@ -3607,7 +3726,7 @@ class HCheckCast : public HTemplateInstruction<2> {
   bool MustDoNullCheck() const { return must_do_null_check_; }
   void ClearMustDoNullCheck() { must_do_null_check_ = false; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   bool IsClassFinal() const { return class_is_final_; }
 
@@ -3653,7 +3772,7 @@ class HMonitorOperation : public HTemplateInstruction<1> {
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
-  uint32_t GetDexPc() const { return dex_pc_; }
+  uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   bool IsEnter() const { return kind_ == kEnter; }
 
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 4e83ce576c..2736453ccc 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -16,6 +16,7 @@
 
 #include "base/arena_allocator.h"
 #include "nodes.h"
+#include "optimizing_unit_test.h"
 
 #include "gtest/gtest.h"
 
@@ -29,7 +30,7 @@ TEST(Node, RemoveInstruction) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -49,7 +50,8 @@ TEST(Node, RemoveInstruction) {
   first_block->AddSuccessor(exit_block);
   exit_block->AddInstruction(new (&allocator) HExit());
 
-  HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1);
+  HEnvironment* environment = new (&allocator) HEnvironment(
+      &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
   null_check->SetRawEnvironment(environment);
   environment->SetRawEnvAt(0, parameter);
   parameter->AddEnvUseAt(null_check->GetEnvironment(), 0);
@@ -70,7 +72,7 @@ TEST(Node, InsertInstruction) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -96,7 +98,7 @@ TEST(Node, AddInstruction) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -112,4 +114,51 @@ TEST(Node, AddInstruction) {
   ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse());
 }
 
+TEST(Node, ParentEnvironment) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = CreateGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0);
+  entry->AddInstruction(parameter1);
+  entry->AddInstruction(with_environment);
+  entry->AddInstruction(new (&allocator) HExit());
+
+  ASSERT_TRUE(parameter1->HasUses());
+  ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse());
+
+  HEnvironment* environment = new (&allocator) HEnvironment(
+      &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+  GrowableArray<HInstruction*> array(&allocator, 1);
+  array.Add(parameter1);
+
+  environment->CopyFrom(array);
+  with_environment->SetRawEnvironment(environment);
+
+  ASSERT_TRUE(parameter1->HasEnvironmentUses());
+  ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse());
+
+  HEnvironment* parent1 = new (&allocator) HEnvironment(
+      &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+  parent1->CopyFrom(array);
+
+  ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 2u);
+
+  HEnvironment* parent2 = new (&allocator) HEnvironment(
+      &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+  parent2->CopyFrom(array);
+  parent1->SetAndCopyParentChain(&allocator, parent2);
+
+  // One use for parent2, and one other use for the new parent of parent1.
+  ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 4u);
+
+  // We have copied the parent chain. So we now have two more uses.
+  environment->SetAndCopyParentChain(&allocator, parent1);
+  ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 6u);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index b2c13adf35..7aea249c42 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -21,6 +21,7 @@
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "optimizing/code_generator.h"
+#include "optimizing/optimizing_unit_test.h"
 #include "utils/assembler.h"
 
 #include "optimizing/optimizing_cfi_test_expected.inc"
@@ -45,10 +46,10 @@ class OptimizingCFITest  : public CFITest {
     std::unique_ptr<const InstructionSetFeatures> isa_features;
     std::string error;
     isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
-    HGraph graph(&allocator);
+    HGraph* graph = CreateGraph(&allocator);
     // Generate simple frame with some spills.
     std::unique_ptr<CodeGenerator> code_gen(
-        CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+        CodeGenerator::Create(graph, isa, *isa_features.get(), opts));
     const int frame_size = 64;
     int core_reg = 0;
     int fp_reg = 0;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 05451bcaa6..8bb5d8ebae 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -320,8 +320,10 @@ static void RunOptimizations(HGraph* graph,
                              const DexCompilationUnit& dex_compilation_unit,
                              PassInfoPrinter* pass_info_printer,
                              StackHandleScopeCollection* handles) {
-  HDeadCodeElimination dce1(graph, stats);
-  HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final");
+  HDeadCodeElimination dce1(graph, stats,
+                            HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+  HDeadCodeElimination dce2(graph, stats,
+                            HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
   HConstantFolding fold1(graph);
   InstructionSimplifier simplify1(graph, stats);
   HBooleanSimplifier boolean_simplify(graph);
@@ -512,7 +514,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
 
   ArenaAllocator arena(Runtime::Current()->GetArenaPool());
   HGraph* graph = new (&arena) HGraph(
-      &arena, compiler_driver->GetCompilerOptions().GetDebuggable());
+      &arena, dex_file, method_idx, compiler_driver->GetCompilerOptions().GetDebuggable());
 
   // For testing purposes, we put a special marker on method names that should be compiled
   // with this compiler. This makes sure we're not regressing.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 65c84e6942..b6b1bb1cad 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -29,25 +29,26 @@ enum MethodCompilationStat {
   kCompiledBaseline,
   kCompiledOptimized,
   kCompiledQuick,
-  kInstructionSimplifications,
   kInlinedInvoke,
-  kNotCompiledUnsupportedIsa,
-  kNotCompiledPathological,
+  kInstructionSimplifications,
+  kNotCompiledBranchOutsideMethodCode,
+  kNotCompiledCannotBuildSSA,
+  kNotCompiledCantAccesType,
+  kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
-  kNotCompiledCannotBuildSSA,
   kNotCompiledNoCodegen,
-  kNotCompiledUnresolvedMethod,
-  kNotCompiledUnresolvedField,
   kNotCompiledNonSequentialRegPair,
+  kNotCompiledPathological,
   kNotCompiledSpaceFilter,
-  kNotOptimizedTryCatch,
-  kNotOptimizedDisabled,
-  kNotCompiledCantAccesType,
-  kNotOptimizedRegisterAllocator,
   kNotCompiledUnhandledInstruction,
+  kNotCompiledUnresolvedField,
+  kNotCompiledUnresolvedMethod,
+  kNotCompiledUnsupportedIsa,
   kNotCompiledVerifyAtRuntime,
-  kNotCompiledClassNotVerified,
+  kNotOptimizedDisabled,
+  kNotOptimizedRegisterAllocator,
+  kNotOptimizedTryCatch,
   kRemovedCheckedCast,
   kRemovedDeadInstruction,
   kRemovedNullCheck,
@@ -98,23 +99,24 @@ class OptimizingCompilerStats {
       case kCompiledQuick : return "kCompiledQuick";
       case kInlinedInvoke : return "kInlinedInvoke";
       case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
+      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
+      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
+      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
       case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
       case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
       case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
-      case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
       case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+      case kNotCompiledPathological : return "kNotCompiledPathological";
       case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+      case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
+      case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
+      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
       case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
+      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
+      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
+      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
       case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
       case kRemovedNullCheck: return "kRemovedNullCheck";
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6b236927da..4f8ec65e43 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -72,11 +72,16 @@ void RemoveSuspendChecks(HGraph* graph) {
   }
 }
 
+inline HGraph* CreateGraph(ArenaAllocator* allocator) {
+  return new (allocator) HGraph(
+      allocator, *reinterpret_cast<DexFile*>(allocator->Alloc(sizeof(DexFile))), -1);
+}
+
 // Create a control-flow graph from Dex instructions.
 inline HGraph* CreateCFG(ArenaAllocator* allocator,
                          const uint16_t* data,
                          Primitive::Type return_type = Primitive::kPrimInt) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph, return_type);
   const DexFile::CodeItem* item =
     reinterpret_cast<const DexFile::CodeItem*>(data);
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 293fde978e..c56100dfa1 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -30,7 +30,7 @@ namespace art {
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2375595978..f53f846326 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1534,9 +1534,10 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
       }
 
       while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
-        DCHECK(current->CoversSlow(env_use->GetPosition()) || (env_use->GetPosition() == range->GetEnd()));
-        LocationSummary* locations = env_use->GetUser()->GetLocations();
-        locations->SetEnvironmentAt(env_use->GetInputIndex(), source);
+        DCHECK(current->CoversSlow(env_use->GetPosition())
+               || (env_use->GetPosition() == range->GetEnd()));
+        HEnvironment* environment = env_use->GetUser()->GetEnvironment();
+        environment->SetLocationAt(env_use->GetInputIndex(), source);
         env_use = env_use->GetNext();
       }
 
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 8c6d904a4c..b72ffb8bf7 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -38,7 +38,7 @@ namespace art {
 static bool Check(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
@@ -60,7 +60,7 @@ static bool Check(const uint16_t* data) {
 TEST(RegisterAllocatorTest, ValidateIntervals) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -255,7 +255,7 @@ TEST(RegisterAllocatorTest, Loop2) {
 }
 
 static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   builder.BuildGraph(*item);
@@ -463,7 +463,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
                                   HPhi** phi,
                                   HInstruction** input1,
                                   HInstruction** input2) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -593,7 +593,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
 static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
                                 HInstruction** field,
                                 HInstruction** ret) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -661,7 +661,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
 static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
                             HInstruction** first_sub,
                             HInstruction** second_sub) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -731,7 +731,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
 
 static HGraph* BuildDiv(ArenaAllocator* allocator,
                         HInstruction** div) {
-  HGraph* graph = new (allocator) HGraph(allocator);
+  HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -783,7 +783,7 @@ TEST(RegisterAllocatorTest, SpillInactive) {
   // Create a synthesized graph to please the register_allocator and
   // ssa_liveness_analysis code.
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index b66e655d2b..59a2852735 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -332,7 +332,7 @@ void SsaBuilder::BuildSsa() {
 }
 
 HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
-  return GetLocalsFor(block)->GetInstructionAt(local);
+  return GetLocalsFor(block)->Get(local);
 }
 
 void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
@@ -349,7 +349,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
         block->AddPhi(phi);
-        current_locals_->SetRawEnvAt(local, phi);
+        current_locals_->Put(local, phi);
       }
     }
     // Save the loop header so that the last phase of the analysis knows which
@@ -389,7 +389,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
         block->AddPhi(phi);
         value = phi;
       }
-      current_locals_->SetRawEnvAt(local, value);
+      current_locals_->Put(local, value);
     }
   }
 
@@ -520,7 +520,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
 }
 
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
+  HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber());
   // If the operation requests a specific type, we make sure its input is of that type.
   if (load->GetType() != value->GetType()) {
     if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
@@ -534,7 +534,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1));
+  current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1));
   store->GetBlock()->RemoveInstruction(store);
 }
 
@@ -543,8 +543,12 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) {
     return;
   }
   HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
-      GetGraph()->GetArena(), current_locals_->Size());
-  environment->CopyFrom(current_locals_);
+      GetGraph()->GetArena(),
+      current_locals_->Size(),
+      GetGraph()->GetDexFile(),
+      GetGraph()->GetMethodIdx(),
+      instruction->GetDexPc());
+  environment->CopyFrom(*current_locals_);
   instruction->SetRawEnvironment(environment);
 }
 
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 265e95b4ac..1c83c4ba48 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,14 +58,15 @@ class SsaBuilder : public HGraphVisitor {
 
   void BuildSsa();
 
-  HEnvironment* GetLocalsFor(HBasicBlock* block) {
-    HEnvironment* env = locals_for_.Get(block->GetBlockId());
-    if (env == nullptr) {
-      env = new (GetGraph()->GetArena()) HEnvironment(
+  GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) {
+    GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId());
+    if (locals == nullptr) {
+      locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>(
           GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs());
-      locals_for_.Put(block->GetBlockId(), env);
+      locals->SetSize(GetGraph()->GetNumberOfVRegs());
+      locals_for_.Put(block->GetBlockId(), locals);
     }
-    return env;
+    return locals;
   }
 
   HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
@@ -93,14 +94,14 @@ class SsaBuilder : public HGraphVisitor {
   static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
 
   // Locals for the current block being visited.
-  HEnvironment* current_locals_;
+  GrowableArray<HInstruction*>* current_locals_;
 
   // Keep track of loop headers found. The last phase of the analysis iterates
   // over these blocks to set the inputs of their phis.
   GrowableArray<HBasicBlock*> loop_headers_;
 
   // HEnvironment for each block.
-  GrowableArray<HEnvironment*> locals_for_;
+  GrowableArray<GrowableArray<HInstruction*>*> locals_for_;
 
   DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
 };
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 17841685b1..250eb04a1c 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -75,9 +75,7 @@ void SsaLivenessAnalysis::LinearizeGraph() {
     HBasicBlock* block = it.Current();
     size_t number_of_forward_predecessors = block->GetPredecessors().Size();
     if (block->IsLoopHeader()) {
-      // We rely on having simplified the CFG.
-      DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges());
-      number_of_forward_predecessors--;
+      number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
     }
     forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors);
   }
@@ -220,10 +218,11 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
 
       // Process the environment first, because we know their uses come after
       // or at the same liveness position of inputs.
-      if (current->HasEnvironment()) {
+      for (HEnvironment* environment = current->GetEnvironment();
+           environment != nullptr;
+           environment = environment->GetParent()) {
         // Handle environment uses. See statements (b) and (c) of the
         // SsaLivenessAnalysis.
-        HEnvironment* environment = current->GetEnvironment();
         for (size_t i = 0, e = environment->Size(); i < e; ++i) {
           HInstruction* instruction = environment->GetInstructionAt(i);
           bool should_be_live = ShouldBeLiveForEnvironment(instruction);
@@ -233,7 +232,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
           }
           if (instruction != nullptr) {
             instruction->GetLiveInterval()->AddUse(
-                current, i, /* is_environment */ true, should_be_live);
+                current, environment, i, should_be_live);
           }
         }
       }
@@ -245,7 +244,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
         // to be materialized.
         if (input->HasSsaIndex()) {
           live_in->SetBit(input->GetSsaIndex());
-          input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false);
+          input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
         }
       }
     }
@@ -264,13 +263,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
     }
 
     if (block->IsLoopHeader()) {
-      HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0);
+      size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
       // For all live_in instructions at the loop header, we need to create a range
       // that covers the full loop.
       for (uint32_t idx : live_in->Indexes()) {
         HInstruction* current = instructions_from_ssa_index_.Get(idx);
-        current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
-                                                 back_edge->GetLifetimeEnd());
+        current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position);
       }
     }
   }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 7b98c4eab5..82c5454bb0 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -104,13 +104,13 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> {
 class UsePosition : public ArenaObject<kArenaAllocMisc> {
  public:
   UsePosition(HInstruction* user,
+              HEnvironment* environment,
               size_t input_index,
-              bool is_environment,
               size_t position,
               UsePosition* next)
       : user_(user),
+        environment_(environment),
         input_index_(input_index),
-        is_environment_(is_environment),
         position_(position),
         next_(next) {
     DCHECK((user == nullptr)
@@ -129,7 +129,7 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
 
   HInstruction* GetUser() const { return user_; }
 
-  bool GetIsEnvironment() const { return is_environment_; }
+  bool GetIsEnvironment() const { return environment_ != nullptr; }
   bool IsSynthesized() const { return user_ == nullptr; }
 
   size_t GetInputIndex() const { return input_index_; }
@@ -144,7 +144,7 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
 
   UsePosition* Dup(ArenaAllocator* allocator) const {
     return new (allocator) UsePosition(
-        user_, input_index_, is_environment_, position_,
+        user_, environment_, input_index_, position_,
         next_ == nullptr ? nullptr : next_->Dup(allocator));
   }
 
@@ -159,8 +159,8 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
 
  private:
   HInstruction* const user_;
+  HEnvironment* const environment_;
   const size_t input_index_;
-  const bool is_environment_;
   const size_t position_;
   UsePosition* next_;
 
@@ -237,15 +237,16 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
     DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
     size_t position = instruction->GetLifetimePosition();
     first_use_ = new (allocator_) UsePosition(
-        instruction, temp_index, /* is_environment */ false, position, first_use_);
+        instruction, /* environment */ nullptr, temp_index, position, first_use_);
     AddRange(position, position + 1);
   }
 
   void AddUse(HInstruction* instruction,
+              HEnvironment* environment,
               size_t input_index,
-              bool is_environment,
               bool keep_alive = false) {
     // Set the use within the instruction.
+    bool is_environment = (environment != nullptr);
     size_t position = instruction->GetLifetimePosition() + 1;
     LocationSummary* locations = instruction->GetLocations();
     if (!is_environment) {
@@ -279,7 +280,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
       }
       DCHECK(first_use_->GetPosition() + 1 == position);
       UsePosition* new_use = new (allocator_) UsePosition(
-          instruction, input_index, is_environment, position, cursor->GetNext());
+          instruction, environment, input_index, position, cursor->GetNext());
       cursor->SetNext(new_use);
       if (first_range_->GetEnd() == first_use_->GetPosition()) {
         first_range_->end_ = position;
@@ -289,10 +290,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
 
     if (is_environment) {
       first_env_use_ = new (allocator_) UsePosition(
-          instruction, input_index, is_environment, position, first_env_use_);
+          instruction, environment, input_index, position, first_env_use_);
     } else {
       first_use_ = new (allocator_) UsePosition(
-          instruction, input_index, is_environment, position, first_use_);
+          instruction, environment, input_index, position, first_use_);
     }
 
     if (is_environment && !keep_alive) {
@@ -331,7 +332,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
       AddBackEdgeUses(*block);
     }
     first_use_ = new (allocator_) UsePosition(
-        instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
+        instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_);
   }
 
   void AddRange(size_t start, size_t end) {
@@ -973,7 +974,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
         break;
       }
 
-      size_t back_edge_use_position = current->GetSingleBackEdge()->GetLifetimeEnd();
+      // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on
+      // all back edges is not necessary: anything used in the loop will have its use at the
+      // last back edge. If we want branches in a loop to have better register allocation than
+      // another branch, then it is the linear order we should change.
+      size_t back_edge_use_position = current->GetLifetimeEnd();
       if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
         // There was a use already seen in this loop. Therefore the previous call to `AddUse`
         // already inserted the backedge use. We can stop going outward.
@@ -985,8 +990,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
              || back_edge_use_position > last_in_new_list->GetPosition());
 
       UsePosition* new_use = new (allocator_) UsePosition(
-          nullptr, UsePosition::kNoInput, /* is_environment */ false,
-          back_edge_use_position, nullptr);
+          /* user */ nullptr,
+          /* environment */ nullptr,
+          UsePosition::kNoInput,
+          back_edge_use_position,
+          /* next */ nullptr);
 
       if (last_in_new_list != nullptr) {
         // Going outward. The latest created use needs to point to the new use.
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 00c241b85a..fb3e7d798c 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -78,7 +78,7 @@ static void ReNumberInstructions(HGraph* graph) {
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
@@ -373,30 +373,26 @@ TEST(SsaTest, Loop6) {
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [5]\n"
-    "  1: IntConstant 4 [14, 8, 8]\n"
-    "  2: IntConstant 5 [14]\n"
+    "  1: IntConstant 4 [5, 8, 8]\n"
+    "  2: IntConstant 5 [5]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
-    "BasicBlock 2, pred: 1, 8, succ: 6, 3\n"
-    "  5: Phi(0, 14) [12, 6, 6]\n"
+    "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n"
+    "  5: Phi(0, 2, 1) [12, 6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
     "BasicBlock 3, pred: 2, succ: 5, 4\n"
     "  8: Equal(1, 1) [9]\n"
     "  9: If(8)\n"
-    "BasicBlock 4, pred: 3, succ: 8\n"
+    "BasicBlock 4, pred: 3, succ: 2\n"
     "  10: Goto\n"
-    "BasicBlock 5, pred: 3, succ: 8\n"
+    "BasicBlock 5, pred: 3, succ: 2\n"
     "  11: Goto\n"
     "BasicBlock 6, pred: 2, succ: 7\n"
     "  12: Return(5)\n"
     "BasicBlock 7, pred: 6\n"
-    "  13: Exit\n"
-    // Synthesized single back edge of loop.
-    "BasicBlock 8, pred: 5, 4, succ: 2\n"
-    "  14: Phi(1, 2) [5]\n"
-    "  15: Goto\n";
+    "  13: Exit\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index a5a0eb2114..5ca66a1de6 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -30,7 +30,7 @@ namespace art {
 static void TestCode(const uint16_t* data) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HGraph* graph = CreateGraph(&allocator);
   HGraphBuilder builder(graph);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 282ab96ce4..5e9653df33 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -272,6 +272,10 @@ void Mips64Assembler::Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x25, rs, rt, imm16);
 }
 
+void Mips64Assembler::Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  EmitI(0x27, rs, rt, imm16);
+}
+
 void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) {
   EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16);
 }
@@ -480,6 +484,9 @@ void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuR
     case kLoadWord:
       Lw(reg, base, offset);
       break;
+    case kLoadUnsignedWord:
+      Lwu(reg, base, offset);
+      break;
     case kLoadDoubleword:
       // TODO: alignment issues ???
       Ld(reg, base, offset);
@@ -512,7 +519,6 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register,
     CHECK_EQ(0u, size) << dst;
   } else if (dst.IsGpuRegister()) {
     if (size == 4) {
-      CHECK_EQ(4u, size) << dst;
       LoadFromOffset(kLoadWord, dst.AsGpuRegister(), src_register, src_offset);
     } else if (size == 8) {
       CHECK_EQ(8u, size) << dst;
@@ -740,14 +746,13 @@ void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> sr
 void Mips64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
-  LoadFromOffset(kLoadWord, dest.AsGpuRegister(), SP, src.Int32Value());
+  LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), SP, src.Int32Value());
 }
 
-void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base,
-                            MemberOffset offs) {
+void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
-  CHECK(dest.IsGpuRegister() && dest.IsGpuRegister());
-  LoadFromOffset(kLoadWord, dest.AsGpuRegister(),
+  CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
+  LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
   if (kPoisonHeapReferences) {
     Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
@@ -921,7 +926,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
     // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
     if (in_reg.IsNoRegister()) {
-      LoadFromOffset(kLoadWord, out_reg.AsGpuRegister(),
+      LoadFromOffset(kLoadUnsignedWord, out_reg.AsGpuRegister(),
                      SP, handle_scope_offset.Int32Value());
       in_reg = out_reg;
     }
@@ -944,7 +949,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
   CHECK(scratch.IsGpuRegister()) << scratch;
   if (null_allowed) {
     Label null_arg;
-    LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP,
+    LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP,
                    handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
@@ -998,7 +1003,7 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(kLoadWord, scratch.AsGpuRegister(),
+  LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(),
                  SP, base.Int32Value());
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  scratch.AsGpuRegister(), offset.Int32Value());
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index b7f6a9e83a..2d7c661eac 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -36,6 +36,7 @@ enum LoadOperandType {
   kLoadSignedHalfword,
   kLoadUnsignedHalfword,
   kLoadWord,
+  kLoadUnsignedWord,
   kLoadDoubleword
 };
 
@@ -85,6 +86,7 @@ class Mips64Assembler FINAL : public Assembler {
   void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lui(GpuRegister rt, uint16_t imm16);
   void Mfhi(GpuRegister rd);
   void Mflo(GpuRegister rd);
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index f2541a2113..7e7520066d 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1507,6 +1507,14 @@ void X86Assembler::jmp(Label* label) {
 }
 
 
+void X86Assembler::repne_scasw() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0xF2);
+  EmitUint8(0xAF);
+}
+
+
 X86Assembler* X86Assembler::lock() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF0);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 946c96de71..136b0cbfdb 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -464,6 +464,8 @@ class X86Assembler FINAL : public Assembler {
   void jmp(const Address& address);
   void jmp(Label* label);
 
+  void repne_scasw();
+
   X86Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
   void cmpxchg8b(const Address& address);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index f326e496d4..aacc57bb0c 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -190,4 +190,10 @@ TEST_F(AssemblerX86Test, FPUIntegerStore) {
   DriverStr(expected, "FPUIntegerStore");
 }
 
+TEST_F(AssemblerX86Test, Repnescasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "Repnescasw");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index c0ca7ef437..feceecac68 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2065,6 +2065,14 @@ void X86_64Assembler::bswapq(CpuRegister dst) {
 }
 
 
+void X86_64Assembler::repne_scasw() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0xF2);
+  EmitUint8(0xAF);
+}
+
+
 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   // TODO: Need to have a code constants table.
   int64_t constant = bit_cast<int64_t, double>(value);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index f5327a8d02..162714af68 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -601,6 +601,8 @@ class X86_64Assembler FINAL : public Assembler {
   void bswapl(CpuRegister dst);
   void bswapq(CpuRegister dst);
 
+  void repne_scasw();
+
   //
   // Macros for High-level operations.
   //
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 9e4144ac26..0be4d632fb 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1215,4 +1215,10 @@ TEST_F(AssemblerX86_64Test, MovsxbRegs) {
   DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
 }
 
+TEST_F(AssemblerX86_64Test, Repnescasw) {
+  GetAssembler()->repne_scasw();
+  const char* expected = "repne scasw\n";
+  DriverStr(expected, "Repnescasw");
+}
+
 }  // namespace art
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index f14dfc27ae..cafc868789 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -166,6 +166,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
 
   qpoints->pDeoptimize = art_quick_deoptimize;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 4b12f00d0d..8c8f8d51a8 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -159,6 +159,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
 
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index a980a86135..ff04106f15 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -272,6 +272,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
   static_assert(IsDirectEntrypoint(kQuickA64Load), "Non-direct C stub marked direct.");
   qpoints->pA64Store = QuasiAtomic::Write64;
   static_assert(IsDirectEntrypoint(kQuickA64Store), "Non-direct C stub marked direct.");
+
+  qpoints->pReadBarrierJni = ReadBarrierJni;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 10976bb70d..26137777e3 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -27,7 +27,8 @@
 #define rSELF $s1
 
 
-    //  Declare a function called name, sets up $gp.
+    // Declare a function called name, sets up $gp.
+    // This macro modifies t8.
 .macro ENTRY name
     .type \name, %function
     .global \name
@@ -35,10 +36,11 @@
     .balign 16
 \name:
     .cfi_startproc
+    // Set up $gp and store the previous $gp value to $t8. It will be pushed to the
+    // stack after the frame has been constructed.
+    .cpsetup $t9, $t8, \name
     // Ensure we get a sane starting CFA.
     .cfi_def_cfa $sp,0
-    // Load $gp. We expect that ".set noreorder" is in effect.
-    .cpload $t9
     // Declare a local convenience label to be branched to when $gp is already set up.
 .L\name\()_gp_set:
 .endm
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index b328708409..321c27bf50 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -180,6 +180,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
   // Atomic 64-bit load/store
   qpoints->pA64Load = QuasiAtomic::Read64;
   qpoints->pA64Store = QuasiAtomic::Write64;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips64/jni_entrypoints_mips64.S b/runtime/arch/mips64/jni_entrypoints_mips64.S
index 10856668a2..70d7d9742c 100644
--- a/runtime/arch/mips64/jni_entrypoints_mips64.S
+++ b/runtime/arch/mips64/jni_entrypoints_mips64.S
@@ -44,8 +44,11 @@ ENTRY art_jni_dlsym_lookup_stub
     .cfi_rel_offset 5, 8
     sd     $a0, 0($sp)
     .cfi_rel_offset 4, 0
-    jal    artFindNativeMethod  # (Thread*)
     move   $a0, $s1             # pass Thread::Current()
+    jal    artFindNativeMethod  # (Thread*)
+    .cpreturn                   # Restore gp from t8 in branch delay slot. gp is not used
+                                # anymore, and t8 may be clobbered in artFindNativeMethod.
+
     ld     $a0, 0($sp)          # restore registers from stack
     .cfi_restore 4
     ld     $a1, 8($sp)
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index d781e7662c..ff79b5d77c 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -27,6 +27,19 @@
     .extern artDeliverPendingExceptionFromCode
 
     /*
+     * Macro that sets up $gp and stores the previous $gp value to $t8.
+     * This macro modifies v1 and t8.
+     */
+.macro SETUP_GP
+    move $v1, $ra
+    bal 1f
+    nop
+1:
+    .cpsetup $ra, $t8, 1b
+    move $ra, $v1
+.endm
+
+    /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      * callee-save: padding + $f24-$f31 + $s0-$s7 + $gp + $ra + $s8 = 19 total + 1x8 bytes padding
@@ -44,8 +57,8 @@
     .cfi_rel_offset 31, 152
     sd     $s8, 144($sp)
     .cfi_rel_offset 30, 144
-    sd     $gp, 136($sp)
-    .cfi_rel_offset 28, 136
+    sd     $t8, 136($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 136        # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 128($sp)
     .cfi_rel_offset 23, 128
     sd     $s6, 120($sp)
@@ -102,8 +115,8 @@
     .cfi_rel_offset 31, 72
     sd     $s8, 64($sp)
     .cfi_rel_offset 30, 64
-    sd     $gp, 56($sp)
-    .cfi_rel_offset 28, 56
+    sd     $t8, 56($sp)            # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 56         # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 48($sp)
     .cfi_rel_offset 23, 48
     sd     $s6, 40($sp)
@@ -130,7 +143,7 @@
     .cfi_restore 31
     ld     $s8, 64($sp)
     .cfi_restore 30
-    ld     $gp, 56($sp)
+    ld     $t8, 56($sp)            # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 48($sp)
     .cfi_restore 23
@@ -146,6 +159,7 @@
     .cfi_restore 18
     daddiu $sp, $sp, 80
     .cfi_adjust_cfa_offset -80
+    .cpreturn
 .endm
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
@@ -153,7 +167,7 @@
     .cfi_restore 31
     ld     $s8, 64($sp)
     .cfi_restore 30
-    ld     $gp, 56($sp)
+    ld     $t8, 56($sp)            # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 48($sp)
     .cfi_restore 23
@@ -167,6 +181,7 @@
     .cfi_restore 19
     ld     $s2, 8($sp)
     .cfi_restore 18
+    .cpreturn
     jalr   $zero, $ra
     daddiu $sp, $sp, 80
     .cfi_adjust_cfa_offset -80
@@ -175,12 +190,6 @@
 // This assumes the top part of these stack frame types are identical.
 #define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
 
-    /*
-     * Macro that sets up the callee save frame to conform with
-     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
-     * non-moving GC.
-     * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
-     */
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
     daddiu  $sp, $sp, -208
     .cfi_adjust_cfa_offset 208
@@ -194,8 +203,8 @@
     .cfi_rel_offset 31, 200
     sd     $s8, 192($sp)
     .cfi_rel_offset 30, 192
-    sd     $gp, 184($sp)
-    .cfi_rel_offset 28, 184
+    sd     $t8, 184($sp)           # t8 holds caller's gp, now save it to the stack.
+    .cfi_rel_offset 28, 184        # Value from gp is pushed, so set the cfi offset accordingly.
     sd     $s7, 176($sp)
     .cfi_rel_offset 23, 176
     sd     $s6, 168($sp)
@@ -232,16 +241,15 @@
     s.d    $f14, 32($sp)
     s.d    $f13, 24($sp)           # = kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset
     s.d    $f12, 16($sp)           # This isn't necessary to store.
-
-    # 1x8 bytes paddig + Method*
-    ld      $v0, %got(_ZN3art7Runtime9instance_E)($gp)
-    ld      $v0, 0($v0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
-    lwu     $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0)
-    sw      $v0, 0($sp)                                # Place Method* at bottom of stack.
-    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+    # 1x8 bytes padding + Method*
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes
+     * non-moving GC.
+     * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method*
+     */
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
     # load appropriate callee-save-method
@@ -253,12 +261,18 @@
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
 
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
+    sw      $a0, 0($sp)                                # Place Method* at bottom of stack.
+    sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
+.endm
+
 .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     ld     $ra, 200($sp)
     .cfi_restore 31
     ld     $s8, 192($sp)
     .cfi_restore 30
-    ld     $gp, 184($sp)
+    ld     $t8, 184($sp)           # Restore gp back to it's temp storage.
     .cfi_restore 28
     ld     $s7, 176($sp)
     .cfi_restore 23
@@ -297,6 +311,7 @@
     l.d    $f13, 24($sp)
     l.d    $f12, 16($sp)
 
+    .cpreturn
     daddiu $sp, $sp, 208
     .cfi_adjust_cfa_offset -208
 .endm
@@ -307,6 +322,7 @@
      * exception is Thread::Current()->exception_
      */
 .macro DELIVER_PENDING_EXCEPTION
+    SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME     # save callee saves for throw
     dla     $t9, artDeliverPendingExceptionFromCode
     jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
@@ -348,7 +364,7 @@
      * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
      * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
      */
-ENTRY art_quick_do_long_jump
+ENTRY_NO_GP art_quick_do_long_jump
     l.d     $f0, 0($a1)
     l.d     $f1, 8($a1)
     l.d     $f2, 16($a1)
@@ -605,7 +621,7 @@ INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvo
      *   a4 = JValue* result
      *   a5 = shorty
      */
-ENTRY art_quick_invoke_stub
+ENTRY_NO_GP art_quick_invoke_stub
     # push a4, a5, s0(rSUSPEND), s1(rSELF), s8, ra onto the stack
     daddiu $sp, $sp, -48
     .cfi_adjust_cfa_offset 48
@@ -707,7 +723,7 @@ END art_quick_invoke_stub
      *   a4 = JValue* result
      *   a5 = shorty
      */
-ENTRY art_quick_invoke_static_stub
+ENTRY_NO_GP art_quick_invoke_static_stub
 
     # push a4, a5, s0(rSUSPEND), s1(rSELF), s8, ra, onto the stack
     daddiu $sp, $sp, -48
@@ -851,7 +867,8 @@ ENTRY art_quick_check_cast
     sd     $a1, 8($sp)
     sd     $a0, 0($sp)
     jal    artIsAssignableFromCode
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
+                                    # t8 may be clobbered in artIsAssignableFromCode.
     beq    $v0, $zero, .Lthrow_class_cast_exception
     ld     $ra, 24($sp)
     jalr   $zero, $ra
@@ -863,6 +880,7 @@ ENTRY art_quick_check_cast
     ld     $a0, 0($sp)
     daddiu $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
+    SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     dla  $t9, artThrowClassCastException
     jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
@@ -908,13 +926,13 @@ ENTRY art_quick_aput_obj
     daddu $t1, $t1, $t0
     sb   $t0, ($t1)
     jalr $zero, $ra
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
 .Ldo_aput_null:
     dsll  $a1, $a1, 2
     daddu $t0, $a0, $a1
     sw   $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     jalr $zero, $ra
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
 .Lcheck_assignability:
     daddiu $sp, $sp, -64
     .cfi_adjust_cfa_offset 64
@@ -927,7 +945,8 @@ ENTRY art_quick_aput_obj
     move   $a1, $t1
     move   $a0, $t0
     jal    artIsAssignableFromCode  # (Class*, Class*)
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
+                                    # t8 may be clobbered in artIsAssignableFromCode.
     ld     $ra, 56($sp)
     ld     $t9, 24($sp)
     ld     $a2, 16($sp)
@@ -935,6 +954,7 @@ ENTRY art_quick_aput_obj
     ld     $a0, 0($sp)
     daddiu $sp, $sp, 64
     .cfi_adjust_cfa_offset -64
+    SETUP_GP
     bne    $v0, $zero, .Ldo_aput
     nop
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
@@ -1312,7 +1332,7 @@ ENTRY art_quick_test_suspend
     bne    $a0, $zero, 1f
     daddiu rSUSPEND, $zero, SUSPEND_CHECK_INTERVAL   # reset rSUSPEND to SUSPEND_CHECK_INTERVAL
     jalr   $zero, $ra
-    nop
+    .cpreturn                                 # Restore gp from t8 in branch delay slot.
 1:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME         # save callee saves for stack crawl
     jal    artTestSuspendFromCode             # (Thread*)
@@ -1326,8 +1346,7 @@ END art_quick_test_suspend
      */
     .extern artQuickProxyInvokeHandler
 ENTRY art_quick_proxy_invoke_handler
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    sd      $a0, 0($sp)            # place proxy method at bottom of frame
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
     move    $a2, rSELF             # pass Thread::Current
     jal     artQuickProxyInvokeHandler  # (Method* proxy method, receiver, Thread*, SP)
     move    $a3, $sp               # pass $sp
@@ -1352,6 +1371,7 @@ ENTRY art_quick_imt_conflict_trampoline
     dsll    $t0, 2                 # convert target method offset to bytes
     daddu   $a0, $t0               # get address of target method
     dla     $t9, art_quick_invoke_interface_trampoline
+    .cpreturn
     jalr    $zero, $t9
     lwu     $a0, MIRROR_OBJECT_ARRAY_DATA_OFFSET($a0)  # load the target method
 END art_quick_imt_conflict_trampoline
@@ -1377,8 +1397,7 @@ END art_quick_resolution_trampoline
     .extern artQuickGenericJniTrampoline
     .extern artQuickGenericJniEndTrampoline
 ENTRY art_quick_generic_jni_trampoline
-    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
-    sd      $a0, 0($sp)            # store native ArtMethod* to bottom of stack
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0
     move    $s8, $sp               # save $sp
 
     # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
@@ -1481,8 +1500,7 @@ END art_quick_instrumentation_entry
     .global art_quick_instrumentation_exit
 art_quick_instrumentation_exit:
     .cfi_startproc
-    daddiu   $t9, $ra, 4       # put current address into $t9 to rebuild $gp
-    .cpload  $t9
+    SETUP_GP
     move     $ra, $zero        # link register is to here, so clobber with 0 for later checks
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     move     $t0, $sp          # remember bottom of caller's frame
@@ -1494,8 +1512,11 @@ art_quick_instrumentation_exit:
     mov.d    $f15, $f0         # pass fpr result
     move     $a2, $v0          # pass gpr result
     move     $a1, $t0          # pass $sp
-    jal      artInstrumentationMethodExitFromCode  # (Thread*, SP, gpr_res, fpr_res)
     move     $a0, rSELF        # pass Thread::Current
+    jal      artInstrumentationMethodExitFromCode  # (Thread*, SP, gpr_res, fpr_res)
+    .cpreturn                  # Restore gp from t8 in branch delay slot. gp is not used anymore,
+                               # and t8 may be clobbered in artInstrumentationMethodExitFromCode.
+
     move     $t9, $v0          # set aside returned link register
     move     $ra, $v1          # set link register for deoptimization
     ld       $v0, 0($sp)       # restore return values
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index de7804f759..a7d24b8e93 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -261,6 +261,132 @@ class StubTest : public CommonRuntimeTest {
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
           "memory");  // clobber.
+#elif defined(__mips__) && !defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a3 and t0-t7 which we say we don't clobber. May contain args.
+        "addiu $sp, $sp, -64\n\t"
+        "sw $a0, 0($sp)\n\t"
+        "sw $a1, 4($sp)\n\t"
+        "sw $a2, 8($sp)\n\t"
+        "sw $a3, 12($sp)\n\t"
+        "sw $t0, 16($sp)\n\t"
+        "sw $t1, 20($sp)\n\t"
+        "sw $t2, 24($sp)\n\t"
+        "sw $t3, 28($sp)\n\t"
+        "sw $t4, 32($sp)\n\t"
+        "sw $t5, 36($sp)\n\t"
+        "sw $t6, 40($sp)\n\t"
+        "sw $t7, 44($sp)\n\t"
+        // Spill gp register since it is caller save.
+        "sw $gp, 52($sp)\n\t"
+
+        "addiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sw %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "addiu $sp, $sp, -20\n\t"
+        "sw %[arg0], 0($sp)\n\t"
+        "sw %[arg1], 4($sp)\n\t"
+        "sw %[arg2], 8($sp)\n\t"
+        "sw %[code], 12($sp)\n\t"
+        "sw %[self], 16($sp)\n\t"
+
+        // Load call params into the right registers.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $t9, 12($sp)\n\t"
+        "lw $s1, 16($sp)\n\t"
+        "addiu $sp, $sp, 20\n\t"
+
+        "jalr $t9\n\t"             // Call the stub.
+        "nop\n\t"
+        "addiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $a3, 12($sp)\n\t"
+        "lw $t0, 16($sp)\n\t"
+        "lw $t1, 20($sp)\n\t"
+        "lw $t2, 24($sp)\n\t"
+        "lw $t3, 28($sp)\n\t"
+        "lw $t4, 32($sp)\n\t"
+        "lw $t5, 36($sp)\n\t"
+        "lw $t6, 40($sp)\n\t"
+        "lw $t7, 44($sp)\n\t"
+        // Restore gp.
+        "lw $gp, 52($sp)\n\t"
+        "addiu $sp, $sp, 64\n\t"   // Free stack space, now sp as on entry.
+
+        "move %[result], $v0\n\t"  // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
+          "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
+#elif defined(__mips__) && defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a7 which we say we don't clobber. May contain args.
+        "daddiu $sp, $sp, -64\n\t"
+        "sd $a0, 0($sp)\n\t"
+        "sd $a1, 8($sp)\n\t"
+        "sd $a2, 16($sp)\n\t"
+        "sd $a3, 24($sp)\n\t"
+        "sd $a4, 32($sp)\n\t"
+        "sd $a5, 40($sp)\n\t"
+        "sd $a6, 48($sp)\n\t"
+        "sd $a7, 56($sp)\n\t"
+
+        "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sd %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "daddiu $sp, $sp, -40\n\t"
+        "sd %[arg0], 0($sp)\n\t"
+        "sd %[arg1], 8($sp)\n\t"
+        "sd %[arg2], 16($sp)\n\t"
+        "sd %[code], 24($sp)\n\t"
+        "sd %[self], 32($sp)\n\t"
+
+        // Load call params into the right registers.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $t9, 24($sp)\n\t"
+        "ld $s1, 32($sp)\n\t"
+        "daddiu $sp, $sp, 40\n\t"
+
+        "jalr $t9\n\t"              // Call the stub.
+        "nop\n\t"
+        "daddiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $a3, 24($sp)\n\t"
+        "ld $a4, 32($sp)\n\t"
+        "ld $a5, 40($sp)\n\t"
+        "ld $a6, 48($sp)\n\t"
+        "ld $a7, 56($sp)\n\t"
+        "daddiu $sp, $sp, 64\n\t"
+
+        "move %[result], $v0\n\t"   // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+          "t8", "t9", "k0", "k1", "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -487,6 +613,136 @@ class StubTest : public CommonRuntimeTest {
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
           "memory");  // clobber.
+#elif defined(__mips__) && !defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a3 and t0-t7 which we say we don't clobber. May contain args.
+        "addiu $sp, $sp, -64\n\t"
+        "sw $a0, 0($sp)\n\t"
+        "sw $a1, 4($sp)\n\t"
+        "sw $a2, 8($sp)\n\t"
+        "sw $a3, 12($sp)\n\t"
+        "sw $t0, 16($sp)\n\t"
+        "sw $t1, 20($sp)\n\t"
+        "sw $t2, 24($sp)\n\t"
+        "sw $t3, 28($sp)\n\t"
+        "sw $t4, 32($sp)\n\t"
+        "sw $t5, 36($sp)\n\t"
+        "sw $t6, 40($sp)\n\t"
+        "sw $t7, 44($sp)\n\t"
+        // Spill gp register since it is caller save.
+        "sw $gp, 52($sp)\n\t"
+
+        "addiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sw %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "addiu $sp, $sp, -24\n\t"
+        "sw %[arg0], 0($sp)\n\t"
+        "sw %[arg1], 4($sp)\n\t"
+        "sw %[arg2], 8($sp)\n\t"
+        "sw %[code], 12($sp)\n\t"
+        "sw %[self], 16($sp)\n\t"
+        "sw %[hidden], 20($sp)\n\t"
+
+        // Load call params into the right registers.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $t9, 12($sp)\n\t"
+        "lw $s1, 16($sp)\n\t"
+        "lw $t0, 20($sp)\n\t"
+        "addiu $sp, $sp, 24\n\t"
+
+        "jalr $t9\n\t"             // Call the stub.
+        "nop\n\t"
+        "addiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "lw $a0, 0($sp)\n\t"
+        "lw $a1, 4($sp)\n\t"
+        "lw $a2, 8($sp)\n\t"
+        "lw $a3, 12($sp)\n\t"
+        "lw $t0, 16($sp)\n\t"
+        "lw $t1, 20($sp)\n\t"
+        "lw $t2, 24($sp)\n\t"
+        "lw $t3, 28($sp)\n\t"
+        "lw $t4, 32($sp)\n\t"
+        "lw $t5, 36($sp)\n\t"
+        "lw $t6, 40($sp)\n\t"
+        "lw $t7, 44($sp)\n\t"
+        // Restore gp.
+        "lw $gp, 52($sp)\n\t"
+        "addiu $sp, $sp, 64\n\t"   // Free stack space, now sp as on entry.
+
+        "move %[result], $v0\n\t"  // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
+          "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
+#elif defined(__mips__) && defined(__LP64__)
+    __asm__ __volatile__ (
+        // Spill a0-a7 which we say we don't clobber. May contain args.
+        "daddiu $sp, $sp, -64\n\t"
+        "sd $a0, 0($sp)\n\t"
+        "sd $a1, 8($sp)\n\t"
+        "sd $a2, 16($sp)\n\t"
+        "sd $a3, 24($sp)\n\t"
+        "sd $a4, 32($sp)\n\t"
+        "sd $a5, 40($sp)\n\t"
+        "sd $a6, 48($sp)\n\t"
+        "sd $a7, 56($sp)\n\t"
+
+        "daddiu $sp, $sp, -16\n\t"  // Reserve stack space, 16B aligned.
+        "sd %[referrer], 0($sp)\n\t"
+
+        // Push everything on the stack, so we don't rely on the order.
+        "daddiu $sp, $sp, -48\n\t"
+        "sd %[arg0], 0($sp)\n\t"
+        "sd %[arg1], 8($sp)\n\t"
+        "sd %[arg2], 16($sp)\n\t"
+        "sd %[code], 24($sp)\n\t"
+        "sd %[self], 32($sp)\n\t"
+        "sd %[hidden], 40($sp)\n\t"
+
+        // Load call params into the right registers.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $t9, 24($sp)\n\t"
+        "ld $s1, 32($sp)\n\t"
+        "ld $t0, 40($sp)\n\t"
+        "daddiu $sp, $sp, 48\n\t"
+
+        "jalr $t9\n\t"              // Call the stub.
+        "nop\n\t"
+        "daddiu $sp, $sp, 16\n\t"   // Drop the quick "frame".
+
+        // Restore stuff not named clobbered.
+        "ld $a0, 0($sp)\n\t"
+        "ld $a1, 8($sp)\n\t"
+        "ld $a2, 16($sp)\n\t"
+        "ld $a3, 24($sp)\n\t"
+        "ld $a4, 32($sp)\n\t"
+        "ld $a5, 40($sp)\n\t"
+        "ld $a6, 48($sp)\n\t"
+        "ld $a7, 56($sp)\n\t"
+        "daddiu $sp, $sp, 64\n\t"
+
+        "move %[result], $v0\n\t"   // Store the call result.
+        : [result] "=r" (result)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+          "t8", "t9", "k0", "k1", "fp", "ra",
+          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
+          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
+          "f27", "f28", "f29", "f30", "f31",
+          "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -521,7 +777,8 @@ class StubTest : public CommonRuntimeTest {
   // Method with 32b arg0, 64b arg1
   size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
                               mirror::ArtMethod* referrer) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
     // Just pass through.
     return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
 #else
@@ -549,7 +806,7 @@ class StubTest : public CommonRuntimeTest {
 
 
 TEST_F(StubTest, Memcpy) {
-#if defined(__i386__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || (defined(__x86_64__) && !defined(__APPLE__)) || defined(__mips__)
   Thread* self = Thread::Current();
 
   uint32_t orig[20];
@@ -586,7 +843,8 @@ TEST_F(StubTest, Memcpy) {
 }
 
 TEST_F(StubTest, LockObject) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -659,7 +917,8 @@ class RandGen {
 
 // NO_THREAD_SAFETY_ANALYSIS as we do not want to grab exclusive mutator lock for MonitorInfo.
 static void TestUnlockObject(StubTest* test) NO_THREAD_SAFETY_ANALYSIS {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -809,12 +1068,14 @@ TEST_F(StubTest, UnlockObject) {
   TestUnlockObject(this);
 }
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_check_cast(void);
 #endif
 
 TEST_F(StubTest, CheckCast) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
   const uintptr_t art_quick_check_cast = StubTest::GetEntrypoint(self, kQuickCheckCast);
@@ -865,7 +1126,8 @@ TEST_F(StubTest, CheckCast) {
 TEST_F(StubTest, APutObj) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
   // Do not check non-checked ones, we'd need handlers and stuff...
@@ -998,7 +1260,8 @@ TEST_F(StubTest, APutObj) {
 TEST_F(StubTest, AllocObject) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // This will lead to OOM  error messages in the log.
   ScopedLogSeverity sls(LogSeverity::FATAL);
 
@@ -1123,7 +1386,8 @@ TEST_F(StubTest, AllocObject) {
 TEST_F(StubTest, AllocObjectArray) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   // This will lead to OOM  error messages in the log.
@@ -1292,7 +1556,8 @@ TEST_F(StubTest, StringCompareTo) {
 static void GetSetBooleanStatic(ArtField* f, Thread* self,
                                 mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   constexpr size_t num_values = 5;
   uint8_t values[num_values] = { 0, 1, 2, 128, 0xFF };
 
@@ -1322,7 +1587,8 @@ static void GetSetBooleanStatic(ArtField* f, Thread* self,
 static void GetSetByteStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                              StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int8_t values[] = { -128, -64, 0, 64, 127 };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1352,7 +1618,8 @@ static void GetSetByteStatic(ArtField* f, Thread* self, mirror::ArtMethod* refer
 static void GetSetBooleanInstance(Handle<mirror::Object>* obj, ArtField* f, Thread* self,
                                   mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint8_t values[] = { 0, true, 2, 128, 0xFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1386,7 +1653,8 @@ static void GetSetBooleanInstance(Handle<mirror::Object>* obj, ArtField* f, Thre
 static void GetSetByteInstance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int8_t values[] = { -128, -64, 0, 64, 127 };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1420,7 +1688,8 @@ static void GetSetByteInstance(Handle<mirror::Object>* obj, ArtField* f,
 static void GetSetCharStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                              StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1449,7 +1718,8 @@ static void GetSetCharStatic(ArtField* f, Thread* self, mirror::ArtMethod* refer
 static void GetSetShortStatic(ArtField* f, Thread* self,
                               mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1479,7 +1749,8 @@ static void GetSetShortStatic(ArtField* f, Thread* self,
 static void GetSetCharInstance(Handle<mirror::Object>* obj, ArtField* f,
                                Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1512,7 +1783,8 @@ static void GetSetCharInstance(Handle<mirror::Object>* obj, ArtField* f,
 static void GetSetShortInstance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1546,7 +1818,8 @@ static void GetSetShortInstance(Handle<mirror::Object>* obj, ArtField* f,
 static void GetSet32Static(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                            StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1563,7 +1836,11 @@ static void GetSet32Static(ArtField* f, Thread* self, mirror::ArtMethod* referre
                                            self,
                                            referrer);
 
+#if defined(__mips__) && defined(__LP64__)
+    EXPECT_EQ(static_cast<uint32_t>(res), values[i]) << "Iteration " << i;
+#else
     EXPECT_EQ(res, values[i]) << "Iteration " << i;
+#endif
   }
 #else
   UNUSED(f, self, referrer, test);
@@ -1577,7 +1854,8 @@ static void GetSet32Static(ArtField* f, Thread* self, mirror::ArtMethod* referre
 static void GetSet32Instance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1611,7 +1889,8 @@ static void GetSet32Instance(Handle<mirror::Object>* obj, ArtField* f,
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 
 static void set_and_check_static(uint32_t f_idx, mirror::Object* val, Thread* self,
                                  mirror::ArtMethod* referrer, StubTest* test)
@@ -1636,7 +1915,8 @@ static void set_and_check_static(uint32_t f_idx, mirror::Object* val, Thread* se
 static void GetSetObjStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                             StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   set_and_check_static(f->GetDexFieldIndex(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
@@ -1653,7 +1933,8 @@ static void GetSetObjStatic(ArtField* f, Thread* self, mirror::ArtMethod* referr
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
 static void set_and_check_instance(ArtField* f, mirror::Object* trg,
                                    mirror::Object* val, Thread* self, mirror::ArtMethod* referrer,
                                    StubTest* test)
@@ -1681,7 +1962,8 @@ static void set_and_check_instance(ArtField* f, mirror::Object* trg,
 static void GetSetObjInstance(Handle<mirror::Object>* obj, ArtField* f,
                               Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   set_and_check_instance(f, obj->Get(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
@@ -1703,7 +1985,8 @@ static void GetSetObjInstance(Handle<mirror::Object>* obj, ArtField* f,
 static void GetSet64Static(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                            StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1733,7 +2016,8 @@ static void GetSet64Static(ArtField* f, Thread* self, mirror::ArtMethod* referre
 static void GetSet64Instance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
+    defined(__aarch64__)
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
@@ -1933,7 +2217,8 @@ TEST_F(StubTest, Fields64) {
 }
 
 TEST_F(StubTest, IMT) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
+    (defined(__x86_64__) && !defined(__APPLE__))
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
   Thread* self = Thread::Current();
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index a371632367..737f4d1c5b 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -138,6 +138,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
 
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 };
 
 }  // namespace art
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 0cddec4102..d0ab9d5d49 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -142,6 +142,9 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
 
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_slow_path;
+
+  // Read barrier
+  qpoints->pReadBarrierJni = ReadBarrierJni;
 #endif  // __APPLE__
 };
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index a115fbe0f4..3e677a4dbe 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -89,7 +89,7 @@ ADD_TEST_EQ(THREAD_ID_OFFSET,
             art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.card_table.
-#define THREAD_CARD_TABLE_OFFSET 120
+#define THREAD_CARD_TABLE_OFFSET 128
 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
             art::Thread::CardTableOffset<__SIZEOF_POINTER__>().Int32Value())
 
@@ -108,7 +108,7 @@ ADD_TEST_EQ(THREAD_TOP_QUICK_FRAME_OFFSET,
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 145 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 146 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 65cb02839a..39ce0d2cbe 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -24,11 +24,6 @@
 
 namespace art {
 
-// The number of words necessary to encode bits.
-static constexpr uint32_t BitsToWords(uint32_t bits) {
-  return RoundUp(bits, 32) / 32;
-}
-
 // TODO: replace excessive argument defaulting when we are at gcc 4.7
 // or later on host with delegating constructor support. Specifically,
 // starts_bits and storage_size/storage are mutually exclusive.
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index be4d363bf5..6e4367ac9d 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -20,6 +20,8 @@
 #include <stdint.h>
 #include <iterator>
 
+#include "utils.h"
+
 namespace art {
 
 class Allocator;
@@ -116,6 +118,11 @@ class BitVector {
 
   virtual ~BitVector();
 
+  // The number of words necessary to encode bits.
+  static constexpr uint32_t BitsToWords(uint32_t bits) {
+    return RoundUp(bits, kWordBits) / kWordBits;
+  }
+
   // Mark the specified bit as "set".
   void SetBit(uint32_t idx) {
     /*
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 5d9cd35c83..d87a563d73 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -29,7 +29,7 @@ namespace art {
 class CheckReferenceMapVisitor : public StackVisitor {
  public:
   explicit CheckReferenceMapVisitor(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 991737893a..34fdd8d76a 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -182,7 +182,7 @@ class CheckJniAbortCatcher {
   }
 
 #define TEST_DISABLED_FOR_MIPS() \
-  if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { \
+  if (kRuntimeISA == kMips) { \
     printf("WARNING: TEST DISABLED FOR MIPS\n"); \
     return; \
   }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index dc1b4f1dd5..4bc9f98dfe 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -57,6 +57,9 @@
 
 namespace art {
 
+// The key identifying the debugger to update instrumentation.
+static constexpr const char* kDbgInstrumentationKey = "Debugger";
+
 static const size_t kMaxAllocRecordStackDepth = 16;  // Max 255.
 static const size_t kDefaultNumAllocRecords = 64*1024;  // Must be a power of 2. 2BE can hold 64k-1.
 
@@ -232,13 +235,29 @@ class DebugInstrumentationListener FINAL : public instrumentation::Instrumentati
   virtual ~DebugInstrumentationListener() {}
 
   void MethodEntered(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+                     uint32_t dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::UpdateDebugger(thread, this_object, method, 0, Dbg::kMethodEntry, nullptr);
+    if (IsListeningToDexPcMoved()) {
+      // We also listen to kDexPcMoved instrumentation event so we know the DexPcMoved method is
+      // going to be called right after us. To avoid sending JDWP events twice for this location,
+      // we report the event in DexPcMoved. However, we must remind this is method entry so we
+      // send the METHOD_ENTRY event. And we can also group it with other events for this location
+      // like BREAKPOINT or SINGLE_STEP (or even METHOD_EXIT if this is a RETURN instruction).
+      thread->SetDebugMethodEntry();
+    } else if (IsListeningToMethodExit() && IsReturn(method, dex_pc)) {
+      // We also listen to kMethodExited instrumentation event and the current instruction is a
+      // RETURN so we know the MethodExited method is going to be called right after us. To avoid
+      // sending JDWP events twice for this location, we report the event(s) in MethodExited.
+      // However, we must remind this is method entry so we send the METHOD_ENTRY event. And we can
+      // also group it with other events for this location like BREAKPOINT or SINGLE_STEP.
+      thread->SetDebugMethodEntry();
+    } else {
+      Dbg::UpdateDebugger(thread, this_object, method, 0, Dbg::kMethodEntry, nullptr);
+    }
   }
 
   void MethodExited(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
@@ -248,14 +267,20 @@ class DebugInstrumentationListener FINAL : public instrumentation::Instrumentati
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::UpdateDebugger(thread, this_object, method, dex_pc, Dbg::kMethodExit, &return_value);
+    uint32_t events = Dbg::kMethodExit;
+    if (thread->IsDebugMethodEntry()) {
+      // It is also the method entry.
+      DCHECK(IsReturn(method, dex_pc));
+      events |= Dbg::kMethodEntry;
+      thread->ClearDebugMethodEntry();
+    }
+    Dbg::UpdateDebugger(thread, this_object, method, dex_pc, events, &return_value);
   }
 
-  void MethodUnwind(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                    uint32_t dex_pc)
+  void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method, uint32_t dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
-    UNUSED(thread, this_object, method, dex_pc);
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
                << " " << dex_pc;
   }
@@ -263,13 +288,27 @@ class DebugInstrumentationListener FINAL : public instrumentation::Instrumentati
   void DexPcMoved(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
                   uint32_t new_dex_pc)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc, 0, nullptr);
+    if (IsListeningToMethodExit() && IsReturn(method, new_dex_pc)) {
+      // We also listen to kMethodExited instrumentation event and the current instruction is a
+      // RETURN so we know the MethodExited method is going to be called right after us. Like in
+      // MethodEntered, we delegate event reporting to MethodExited.
+      // Besides, if this RETURN instruction is the only one in the method, we can send multiple
+      // JDWP events in the same packet: METHOD_ENTRY, METHOD_EXIT, BREAKPOINT and/or SINGLE_STEP.
+      // Therefore, we must not clear the debug method entry flag here.
+    } else {
+      uint32_t events = 0;
+      if (thread->IsDebugMethodEntry()) {
+        // It is also the method entry.
+        events = Dbg::kMethodEntry;
+        thread->ClearDebugMethodEntry();
+      }
+      Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc, events, nullptr);
+    }
   }
 
-  void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                 uint32_t dex_pc, ArtField* field)
+  void FieldRead(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object,
+                 mirror::ArtMethod* method, uint32_t dex_pc, ArtField* field)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UNUSED(thread);
     Dbg::PostFieldAccessEvent(method, dex_pc, this_object, field);
   }
 
@@ -293,6 +332,26 @@ class DebugInstrumentationListener FINAL : public instrumentation::Instrumentati
   }
 
  private:
+  static bool IsReturn(mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const DexFile::CodeItem* code_item = method->GetCodeItem();
+    const Instruction* instruction = Instruction::At(&code_item->insns_[dex_pc]);
+    return instruction->IsReturn();
+  }
+
+  static bool IsListeningToDexPcMoved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return IsListeningTo(instrumentation::Instrumentation::kDexPcMoved);
+  }
+
+  static bool IsListeningToMethodExit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return IsListeningTo(instrumentation::Instrumentation::kMethodExited);
+  }
+
+  static bool IsListeningTo(instrumentation::Instrumentation::InstrumentationEvent event)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (Dbg::GetInstrumentationEvents() & event) != 0;
+  }
+
   DISALLOW_COPY_AND_ASSIGN(DebugInstrumentationListener);
 } gDebugInstrumentationListener;
 
@@ -677,7 +736,7 @@ void Dbg::Disconnected() {
       instrumentation_events_ = 0;
     }
     if (RequiresDeoptimization()) {
-      runtime->GetInstrumentation()->DisableDeoptimization();
+      runtime->GetInstrumentation()->DisableDeoptimization(kDbgInstrumentationKey);
     }
     gDebuggerActive = false;
   }
@@ -831,8 +890,10 @@ JDWP::JdwpError Dbg::GetOwnedMonitors(JDWP::ObjectId thread_id,
                         std::vector<JDWP::ObjectId>* monitor_vector,
                         std::vector<uint32_t>* stack_depth_vector)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), current_stack_depth(0),
-        monitors(monitor_vector), stack_depths(stack_depth_vector) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        current_stack_depth(0),
+        monitors(monitor_vector),
+        stack_depths(stack_depth_vector) {}
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
@@ -2082,6 +2143,7 @@ JDWP::JdwpThreadStatus Dbg::ToJdwpThreadStatus(ThreadState state) {
     case kWaitingForDebuggerToAttach:
     case kWaitingForDeoptimization:
     case kWaitingForGcToComplete:
+    case kWaitingForGetObjectsAllocated:
     case kWaitingForJniOnLoad:
     case kWaitingForMethodTracingStart:
     case kWaitingForSignalCatcherOutput:
@@ -2193,7 +2255,8 @@ void Dbg::GetThreads(mirror::Object* thread_group, std::vector<JDWP::ObjectId>*
 static int GetStackDepth(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   struct CountStackDepthVisitor : public StackVisitor {
     explicit CountStackDepthVisitor(Thread* thread_in)
-        : StackVisitor(thread_in, nullptr), depth(0) {}
+        : StackVisitor(thread_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+          depth(0) {}
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
@@ -2233,8 +2296,11 @@ JDWP::JdwpError Dbg::GetThreadFrames(JDWP::ObjectId thread_id, size_t start_fram
     GetFrameVisitor(Thread* thread, size_t start_frame_in, size_t frame_count_in,
                     JDWP::ExpandBuf* buf_in)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, nullptr), depth_(0),
-          start_frame_(start_frame_in), frame_count_(frame_count_in), buf_(buf_in) {
+        : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+          depth_(0),
+          start_frame_(start_frame_in),
+          frame_count_(frame_count_in),
+          buf_(buf_in) {
       expandBufAdd4BE(buf_, frame_count_);
     }
 
@@ -2351,7 +2417,9 @@ void Dbg::SuspendSelf() {
 struct GetThisVisitor : public StackVisitor {
   GetThisVisitor(Thread* thread, Context* context, JDWP::FrameId frame_id_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_object(nullptr), frame_id(frame_id_in) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_object(nullptr),
+        frame_id(frame_id_in) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
   // annotalysis.
@@ -2391,7 +2459,9 @@ class FindFrameVisitor FINAL : public StackVisitor {
  public:
   FindFrameVisitor(Thread* thread, Context* context, JDWP::FrameId frame_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), frame_id_(frame_id), error_(JDWP::ERR_INVALID_FRAMEID) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        frame_id_(frame_id),
+        error_(JDWP::ERR_INVALID_FRAMEID) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
   // annotalysis.
@@ -2775,7 +2845,7 @@ class CatchLocationFinder : public StackVisitor {
  public:
   CatchLocationFinder(Thread* self, const Handle<mirror::Throwable>& exception, Context* context)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-    : StackVisitor(self, context),
+    : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
       self_(self),
       exception_(exception),
       handle_scope_(self),
@@ -2998,12 +3068,12 @@ void Dbg::ProcessDeoptimizationRequest(const DeoptimizationRequest& request) {
       break;
     case DeoptimizationRequest::kFullDeoptimization:
       VLOG(jdwp) << "Deoptimize the world ...";
-      instrumentation->DeoptimizeEverything();
+      instrumentation->DeoptimizeEverything(kDbgInstrumentationKey);
       VLOG(jdwp) << "Deoptimize the world DONE";
       break;
     case DeoptimizationRequest::kFullUndeoptimization:
       VLOG(jdwp) << "Undeoptimize the world ...";
-      instrumentation->UndeoptimizeEverything();
+      instrumentation->UndeoptimizeEverything(kDbgInstrumentationKey);
       VLOG(jdwp) << "Undeoptimize the world DONE";
       break;
     case DeoptimizationRequest::kSelectiveDeoptimization:
@@ -3523,8 +3593,10 @@ JDWP::JdwpError Dbg::ConfigureStep(JDWP::ObjectId thread_id, JDWP::JdwpStepSize
   // is for step-out.
   struct SingleStepStackVisitor : public StackVisitor {
     explicit SingleStepStackVisitor(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, nullptr), stack_depth(0), method(nullptr), line_number(-1) {
-    }
+        : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+          stack_depth(0),
+          method(nullptr),
+          line_number(-1) {}
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
@@ -4637,7 +4709,9 @@ void Dbg::SetAllocTrackingEnabled(bool enable) {
 struct AllocRecordStackVisitor : public StackVisitor {
   AllocRecordStackVisitor(Thread* thread, AllocRecord* record_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr), record(record_in), depth(0) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        record(record_in),
+        depth(0) {}
 
   // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
   // annotalysis.
diff --git a/runtime/debugger.h b/runtime/debugger.h
index fe90eb613e..789a0a4dca 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -714,6 +714,10 @@ class Dbg {
 
   static JDWP::JdwpState* GetJdwpState();
 
+  static uint32_t GetInstrumentationEvents() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return instrumentation_events_;
+  }
+
  private:
   static JDWP::JdwpError GetLocalValue(const StackVisitor& visitor,
                                        ScopedObjectAccessUnchecked& soa, int slot,
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index db8c0e3c58..b72ce34648 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -32,6 +32,8 @@ class Array;
 class ArtMethod;
 class Class;
 class Object;
+template<class MirrorType>
+class CompressedReference;
 }  // namespace mirror
 
 class Thread;
@@ -65,6 +67,10 @@ extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
                                                              jobject locked, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 
+extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_on_stack,
+                           Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 035f57a08a..0aca58fb16 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -143,7 +143,9 @@
   V(NewStringFromCodePoints, void) \
   V(NewStringFromString, void) \
   V(NewStringFromStringBuffer, void) \
-  V(NewStringFromStringBuilder, void)
+  V(NewStringFromStringBuilder, void) \
+\
+  V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*)
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index e478d2a840..51817a249d 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -22,6 +22,13 @@
 
 namespace art {
 
+extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_on_stack,
+                           Thread* self ATTRIBUTE_UNUSED) {
+  // Call the read barrier and update the handle.
+  mirror::Object* to_ref = ReadBarrier::BarrierForRoot(handle_on_stack);
+  handle_on_stack->Assign(to_ref);
+}
+
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
 extern uint32_t JniMethodStart(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 1fb45f4d4d..482f656fa6 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -305,8 +305,10 @@ class EntrypointsOrderTest : public CommonRuntimeTest {
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuffer, pNewStringFromStringBuilder,
                          sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
+                         sizeof(void*));
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pNewStringFromStringBuilder)
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierJni)
             + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all);
   }
 };
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 1068e90a09..55a8411863 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -385,7 +385,7 @@ class MarkSweepMarkObjectSlowPath {
       LOG(INTERNAL_FATAL) << "Attempting see if it's a bad root";
       mark_sweep_->VerifyRoots();
       PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
-      MemMap::DumpMaps(LOG(INTERNAL_FATAL));
+      MemMap::DumpMaps(LOG(INTERNAL_FATAL), true);
       LOG(FATAL) << "Can't mark invalid object";
     }
   }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index cbbc76ccd1..11a0e3c3b8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -491,7 +491,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
     bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(),
                                       non_moving_space_->GetMemMap());
     if (!no_gap) {
-      MemMap::DumpMaps(LOG(ERROR));
+      MemMap::DumpMaps(LOG(ERROR), true);
       LOG(FATAL) << "There's a gap between the image space and the non-moving space";
     }
   }
@@ -1612,10 +1612,19 @@ void Heap::SetTargetHeapUtilization(float target) {
 }
 
 size_t Heap::GetObjectsAllocated() const {
+  Thread* self = Thread::Current();
+  ScopedThreadStateChange tsc(self, kWaitingForGetObjectsAllocated);
+  auto* tl = Runtime::Current()->GetThreadList();
+  // Need SuspendAll here to prevent lock violation if RosAlloc does it during InspectAll.
+  tl->SuspendAll(__FUNCTION__);
   size_t total = 0;
-  for (space::AllocSpace* space : alloc_spaces_) {
-    total += space->GetObjectsAllocated();
+  {
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    for (space::AllocSpace* space : alloc_spaces_) {
+      total += space->GetObjectsAllocated();
+    }
   }
+  tl->ResumeAll();
   return total;
 }
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e6c333d5cd..98e6200bcb 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -16,13 +16,10 @@
 
 #include "instrumentation.h"
 
-#include <sys/uio.h>
-
 #include <sstream>
 
 #include "arch/context.h"
 #include "atomic.h"
-#include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
@@ -39,16 +36,18 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "nth_caller_visitor.h"
-#include "os.h"
-#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "thread_list.h"
 
 namespace art {
-
 namespace instrumentation {
 
-const bool kVerboseInstrumentation = false;
+constexpr bool kVerboseInstrumentation = false;
+
+// Instrumentation works on non-inlined frames by updating returned PCs
+// of compiled frames.
+static constexpr StackVisitor::StackWalkKind kInstrumentationStackWalk =
+    StackVisitor::StackWalkKind::kSkipInlinedFrames;
 
 static bool InstallStubsClassVisitor(mirror::Class* klass, void* arg)
     EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -64,7 +63,7 @@ Instrumentation::Instrumentation()
       have_method_entry_listeners_(false), have_method_exit_listeners_(false),
       have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
       have_field_read_listeners_(false), have_field_write_listeners_(false),
-      have_exception_caught_listeners_(false),
+      have_exception_caught_listeners_(false), have_backward_branch_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
       interpreter_handler_table_(kMainHandlerTable),
@@ -166,16 +165,16 @@ void Instrumentation::InstallStubsForMethod(mirror::ArtMethod* method) {
 // existing instrumentation frames.
 static void InstrumentationInstallStack(Thread* thread, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  struct InstallStackVisitor : public StackVisitor {
+  struct InstallStackVisitor FINAL : public StackVisitor {
     InstallStackVisitor(Thread* thread_in, Context* context, uintptr_t instrumentation_exit_pc)
-        : StackVisitor(thread_in, context),
+        : StackVisitor(thread_in, context, kInstrumentationStackWalk),
           instrumentation_stack_(thread_in->GetInstrumentationStack()),
           instrumentation_exit_pc_(instrumentation_exit_pc),
           reached_existing_instrumentation_frames_(false), instrumentation_stack_depth_(0),
           last_return_pc_(0) {
     }
 
-    virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       mirror::ArtMethod* m = GetMethod();
       if (m == nullptr) {
         if (kVerboseInstrumentation) {
@@ -306,16 +305,17 @@ static void InstrumentationInstallStack(Thread* thread, void* arg)
 // Removes the instrumentation exit pc as the return PC for every quick frame.
 static void InstrumentationRestoreStack(Thread* thread, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  struct RestoreStackVisitor : public StackVisitor {
+  struct RestoreStackVisitor FINAL : public StackVisitor {
     RestoreStackVisitor(Thread* thread_in, uintptr_t instrumentation_exit_pc,
                         Instrumentation* instrumentation)
-        : StackVisitor(thread_in, nullptr), thread_(thread_in),
+        : StackVisitor(thread_in, nullptr, kInstrumentationStackWalk),
+          thread_(thread_in),
           instrumentation_exit_pc_(instrumentation_exit_pc),
           instrumentation_(instrumentation),
           instrumentation_stack_(thread_in->GetInstrumentationStack()),
           frames_removed_(0) {}
 
-    virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       if (instrumentation_stack_->size() == 0) {
         return false;  // Stop.
       }
@@ -390,25 +390,29 @@ static void InstrumentationRestoreStack(Thread* thread, void* arg)
   }
 }
 
+static bool HasEvent(Instrumentation::InstrumentationEvent expected, uint32_t events) {
+  return (events & expected) != 0;
+}
+
 void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  if ((events & kMethodEntered) != 0) {
+  if (HasEvent(kMethodEntered, events)) {
     method_entry_listeners_.push_back(listener);
     have_method_entry_listeners_ = true;
   }
-  if ((events & kMethodExited) != 0) {
+  if (HasEvent(kMethodExited, events)) {
     method_exit_listeners_.push_back(listener);
     have_method_exit_listeners_ = true;
   }
-  if ((events & kMethodUnwind) != 0) {
+  if (HasEvent(kMethodUnwind, events)) {
     method_unwind_listeners_.push_back(listener);
     have_method_unwind_listeners_ = true;
   }
-  if ((events & kBackwardBranch) != 0) {
+  if (HasEvent(kBackwardBranch, events)) {
     backward_branch_listeners_.push_back(listener);
     have_backward_branch_listeners_ = true;
   }
-  if ((events & kDexPcMoved) != 0) {
+  if (HasEvent(kDexPcMoved, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_dex_pc_listeners_) {
       modified = new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
@@ -419,7 +423,7 @@ void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t ev
     dex_pc_listeners_.reset(modified);
     have_dex_pc_listeners_ = true;
   }
-  if ((events & kFieldRead) != 0) {
+  if (HasEvent(kFieldRead, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_field_read_listeners_) {
       modified = new std::list<InstrumentationListener*>(*field_read_listeners_.get());
@@ -430,7 +434,7 @@ void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t ev
     field_read_listeners_.reset(modified);
     have_field_read_listeners_ = true;
   }
-  if ((events & kFieldWritten) != 0) {
+  if (HasEvent(kFieldWritten, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_field_write_listeners_) {
       modified = new std::list<InstrumentationListener*>(*field_write_listeners_.get());
@@ -441,7 +445,7 @@ void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t ev
     field_write_listeners_.reset(modified);
     have_field_write_listeners_ = true;
   }
-  if ((events & kExceptionCaught) != 0) {
+  if (HasEvent(kExceptionCaught, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_exception_caught_listeners_) {
       modified = new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
@@ -458,102 +462,104 @@ void Instrumentation::AddListener(InstrumentationListener* listener, uint32_t ev
 void Instrumentation::RemoveListener(InstrumentationListener* listener, uint32_t events) {
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
 
-  if ((events & kMethodEntered) != 0) {
-    if (have_method_entry_listeners_) {
-      method_entry_listeners_.remove(listener);
-      have_method_entry_listeners_ = !method_entry_listeners_.empty();
-    }
+  if (HasEvent(kMethodEntered, events) && have_method_entry_listeners_) {
+    method_entry_listeners_.remove(listener);
+    have_method_entry_listeners_ = !method_entry_listeners_.empty();
   }
-  if ((events & kMethodExited) != 0) {
-    if (have_method_exit_listeners_) {
-      method_exit_listeners_.remove(listener);
-      have_method_exit_listeners_ = !method_exit_listeners_.empty();
-    }
+  if (HasEvent(kMethodExited, events) && have_method_exit_listeners_) {
+    method_exit_listeners_.remove(listener);
+    have_method_exit_listeners_ = !method_exit_listeners_.empty();
   }
-  if ((events & kMethodUnwind) != 0) {
-    if (have_method_unwind_listeners_) {
+  if (HasEvent(kMethodUnwind, events) && have_method_unwind_listeners_) {
       method_unwind_listeners_.remove(listener);
       have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
-    }
   }
-  if ((events & kDexPcMoved) != 0) {
+  if (HasEvent(kBackwardBranch, events) && have_backward_branch_listeners_) {
+      backward_branch_listeners_.remove(listener);
+      have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
+    }
+  if (HasEvent(kDexPcMoved, events) && have_dex_pc_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
+    modified->remove(listener);
+    have_dex_pc_listeners_ = !modified->empty();
     if (have_dex_pc_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
-      modified->remove(listener);
-      have_dex_pc_listeners_ = !modified->empty();
-      if (have_dex_pc_listeners_) {
-        dex_pc_listeners_.reset(modified);
-      } else {
-        dex_pc_listeners_.reset();
-        delete modified;
-      }
+      dex_pc_listeners_.reset(modified);
+    } else {
+      dex_pc_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kFieldRead) != 0) {
+  if (HasEvent(kFieldRead, events) && have_field_read_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*field_read_listeners_.get());
+    modified->remove(listener);
+    have_field_read_listeners_ = !modified->empty();
     if (have_field_read_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*field_read_listeners_.get());
-      modified->remove(listener);
-      have_field_read_listeners_ = !modified->empty();
-      if (have_field_read_listeners_) {
-        field_read_listeners_.reset(modified);
-      } else {
-        field_read_listeners_.reset();
-        delete modified;
-      }
+      field_read_listeners_.reset(modified);
+    } else {
+      field_read_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kFieldWritten) != 0) {
+  if (HasEvent(kFieldWritten, events) && have_field_write_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*field_write_listeners_.get());
+    modified->remove(listener);
+    have_field_write_listeners_ = !modified->empty();
     if (have_field_write_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*field_write_listeners_.get());
-      modified->remove(listener);
-      have_field_write_listeners_ = !modified->empty();
-      if (have_field_write_listeners_) {
-        field_write_listeners_.reset(modified);
-      } else {
-        field_write_listeners_.reset();
-        delete modified;
-      }
+      field_write_listeners_.reset(modified);
+    } else {
+      field_write_listeners_.reset();
+      delete modified;
     }
   }
-  if ((events & kExceptionCaught) != 0) {
+  if (HasEvent(kExceptionCaught, events) && have_exception_caught_listeners_) {
+    std::list<InstrumentationListener*>* modified =
+        new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
+    modified->remove(listener);
+    have_exception_caught_listeners_ = !modified->empty();
     if (have_exception_caught_listeners_) {
-      std::list<InstrumentationListener*>* modified =
-          new std::list<InstrumentationListener*>(*exception_caught_listeners_.get());
-      modified->remove(listener);
-      have_exception_caught_listeners_ = !modified->empty();
-      if (have_exception_caught_listeners_) {
-        exception_caught_listeners_.reset(modified);
-      } else {
-        exception_caught_listeners_.reset();
-        delete modified;
-      }
+      exception_caught_listeners_.reset(modified);
+    } else {
+      exception_caught_listeners_.reset();
+      delete modified;
     }
   }
   UpdateInterpreterHandlerTable();
 }
 
-void Instrumentation::ConfigureStubs(bool require_entry_exit_stubs, bool require_interpreter) {
-  interpret_only_ = require_interpreter || forced_interpret_only_;
-  // Compute what level of instrumentation is required and compare to current.
-  int desired_level, current_level;
-  if (require_interpreter) {
-    desired_level = 2;
-  } else if (require_entry_exit_stubs) {
-    desired_level = 1;
-  } else {
-    desired_level = 0;
-  }
+Instrumentation::InstrumentationLevel Instrumentation::GetCurrentInstrumentationLevel() const {
   if (interpreter_stubs_installed_) {
-    current_level = 2;
+    return InstrumentationLevel::kInstrumentWithInterpreter;
   } else if (entry_exit_stubs_installed_) {
-    current_level = 1;
+    return InstrumentationLevel::kInstrumentWithInstrumentationStubs;
+  } else {
+    return InstrumentationLevel::kInstrumentNothing;
+  }
+}
+
+void Instrumentation::ConfigureStubs(const char* key, InstrumentationLevel desired_level) {
+  // Store the instrumentation level for this key or remove it.
+  if (desired_level == InstrumentationLevel::kInstrumentNothing) {
+    // The client no longer needs instrumentation.
+    requested_instrumentation_levels_.erase(key);
   } else {
-    current_level = 0;
+    // The client needs instrumentation.
+    requested_instrumentation_levels_.Overwrite(key, desired_level);
   }
-  if (desired_level == current_level) {
+
+  // Look for the highest required instrumentation level.
+  InstrumentationLevel requested_level = InstrumentationLevel::kInstrumentNothing;
+  for (const auto& v : requested_instrumentation_levels_) {
+    requested_level = std::max(requested_level, v.second);
+  }
+
+  interpret_only_ = (requested_level == InstrumentationLevel::kInstrumentWithInterpreter) ||
+                    forced_interpret_only_;
+
+  InstrumentationLevel current_level = GetCurrentInstrumentationLevel();
+  if (requested_level == current_level) {
     // We're already set.
     return;
   }
@@ -561,12 +567,14 @@ void Instrumentation::ConfigureStubs(bool require_entry_exit_stubs, bool require
   Runtime* runtime = Runtime::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
-  if (desired_level > 0) {
-    if (require_interpreter) {
+  if (requested_level > InstrumentationLevel::kInstrumentNothing) {
+    if (requested_level == InstrumentationLevel::kInstrumentWithInterpreter) {
       interpreter_stubs_installed_ = true;
+      entry_exit_stubs_installed_ = true;
     } else {
-      CHECK(require_entry_exit_stubs);
+      CHECK_EQ(requested_level, InstrumentationLevel::kInstrumentWithInstrumentationStubs);
       entry_exit_stubs_installed_ = true;
+      interpreter_stubs_installed_ = false;
     }
     runtime->GetClassLinker()->VisitClasses(InstallStubsClassVisitor, this);
     instrumentation_stubs_installed_ = true;
@@ -590,8 +598,7 @@ void Instrumentation::ConfigureStubs(bool require_entry_exit_stubs, bool require
   }
 }
 
-static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
-  UNUSED(arg);
+static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
   thread->ResetQuickAllocEntryPointsForThread();
 }
 
@@ -804,11 +811,11 @@ void Instrumentation::EnableDeoptimization() {
   deoptimization_enabled_ = true;
 }
 
-void Instrumentation::DisableDeoptimization() {
+void Instrumentation::DisableDeoptimization(const char* key) {
   CHECK_EQ(deoptimization_enabled_, true);
   // If we deoptimized everything, undo it.
   if (interpreter_stubs_installed_) {
-    UndeoptimizeEverything();
+    UndeoptimizeEverything(key);
   }
   // Undeoptimized selected methods.
   while (true) {
@@ -828,25 +835,35 @@ void Instrumentation::DisableDeoptimization() {
 
 // Indicates if instrumentation should notify method enter/exit events to the listeners.
 bool Instrumentation::ShouldNotifyMethodEnterExitEvents() const {
+  if (!HasMethodEntryListeners() && !HasMethodExitListeners()) {
+    return false;
+  }
   return !deoptimization_enabled_ && !interpreter_stubs_installed_;
 }
 
-void Instrumentation::DeoptimizeEverything() {
-  CHECK(!interpreter_stubs_installed_);
-  ConfigureStubs(false, true);
+void Instrumentation::DeoptimizeEverything(const char* key) {
+  CHECK(deoptimization_enabled_);
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentWithInterpreter);
 }
 
-void Instrumentation::UndeoptimizeEverything() {
+void Instrumentation::UndeoptimizeEverything(const char* key) {
   CHECK(interpreter_stubs_installed_);
-  ConfigureStubs(false, false);
+  CHECK(deoptimization_enabled_);
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentNothing);
 }
 
-void Instrumentation::EnableMethodTracing(bool require_interpreter) {
-  ConfigureStubs(!require_interpreter, require_interpreter);
+void Instrumentation::EnableMethodTracing(const char* key, bool needs_interpreter) {
+  InstrumentationLevel level;
+  if (needs_interpreter) {
+    level = InstrumentationLevel::kInstrumentWithInterpreter;
+  } else {
+    level = InstrumentationLevel::kInstrumentWithInstrumentationStubs;
+  }
+  ConfigureStubs(key, level);
 }
 
-void Instrumentation::DisableMethodTracing() {
-  ConfigureStubs(false, false);
+void Instrumentation::DisableMethodTracing(const char* key) {
+  ConfigureStubs(key, InstrumentationLevel::kInstrumentNothing);
 }
 
 const void* Instrumentation::GetQuickCodeFor(mirror::ArtMethod* method, size_t pointer_size) const {
@@ -896,7 +913,7 @@ void Instrumentation::MethodExitEventImpl(Thread* thread, mirror::Object* this_o
 void Instrumentation::MethodUnwindEvent(Thread* thread, mirror::Object* this_object,
                                         mirror::ArtMethod* method,
                                         uint32_t dex_pc) const {
-  if (have_method_unwind_listeners_) {
+  if (HasMethodUnwindListeners()) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
       listener->MethodUnwind(thread, this_object, method, dex_pc);
     }
@@ -906,11 +923,9 @@ void Instrumentation::MethodUnwindEvent(Thread* thread, mirror::Object* this_obj
 void Instrumentation::DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                                           mirror::ArtMethod* method,
                                           uint32_t dex_pc) const {
-  if (HasDexPcListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(dex_pc_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->DexPcMoved(thread, this_object, method, dex_pc);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(dex_pc_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->DexPcMoved(thread, this_object, method, dex_pc);
   }
 }
 
@@ -924,22 +939,18 @@ void Instrumentation::BackwardBranchImpl(Thread* thread, mirror::ArtMethod* meth
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field) const {
-  if (HasFieldReadListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->FieldRead(thread, this_object, method, dex_pc, field);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->FieldRead(thread, this_object, method, dex_pc, field);
   }
 }
 
 void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field, const JValue& field_value) const {
-  if (HasFieldWriteListeners()) {
-    std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
-    for (InstrumentationListener* listener : *original.get()) {
-      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
-    }
+  std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
+  for (InstrumentationListener* listener : *original.get()) {
+    listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
   }
 }
 
@@ -959,7 +970,7 @@ void Instrumentation::ExceptionCaughtEvent(Thread* thread,
 static void CheckStackDepth(Thread* self, const InstrumentationStackFrame& instrumentation_frame,
                             int delta)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  size_t frame_id = StackVisitor::ComputeNumFrames(self) + delta;
+  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk) + delta;
   if (frame_id != instrumentation_frame.frame_id_) {
     LOG(ERROR) << "Expected frame_id=" << frame_id << " but found "
         << instrumentation_frame.frame_id_;
@@ -972,7 +983,7 @@ void Instrumentation::PushInstrumentationStackFrame(Thread* self, mirror::Object
                                                     mirror::ArtMethod* method,
                                                     uintptr_t lr, bool interpreter_entry) {
   // We have a callee-save frame meaning this value is guaranteed to never be 0.
-  size_t frame_id = StackVisitor::ComputeNumFrames(self);
+  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   if (kVerboseInstrumentation) {
     LOG(INFO) << "Entering " << PrettyMethod(method) << " from PC " << reinterpret_cast<void*>(lr);
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 8b7fcca48e..7d70d211bb 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -22,11 +22,10 @@
 #include <map>
 
 #include "arch/instruction_set.h"
-#include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
-#include "object_callbacks.h"
+#include "safe_map.h"
 
 namespace art {
 namespace mirror {
@@ -67,8 +66,6 @@ struct InstrumentationListener {
                              uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when a method is exited.
-  // TODO: its likely passing the return value would be useful, however, we may need to get and
-  //       parse the shorty to determine what kind of register holds the result.
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
                             mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
@@ -119,6 +116,12 @@ class Instrumentation {
     kBackwardBranch = 0x80,
   };
 
+  enum class InstrumentationLevel {
+    kInstrumentNothing,                   // execute without instrumentation
+    kInstrumentWithInstrumentationStubs,  // execute with instrumentation entry/exit stubs
+    kInstrumentWithInterpreter            // execute with interpreter
+  };
+
   Instrumentation();
 
   // Add a listener to be notified of the masked together sent of instrumentation events. This
@@ -138,7 +141,7 @@ class Instrumentation {
   void EnableDeoptimization()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
-  void DisableDeoptimization()
+  void DisableDeoptimization(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
   bool AreAllMethodsDeoptimized() const {
@@ -147,12 +150,12 @@ class Instrumentation {
   bool ShouldNotifyMethodEnterExitEvents() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Executes everything with interpreter.
-  void DeoptimizeEverything()
+  void DeoptimizeEverything(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
   // Executes everything with compiled code (or interpreter if there is no code).
-  void UndeoptimizeEverything()
+  void UndeoptimizeEverything(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
@@ -170,18 +173,19 @@ class Instrumentation {
       LOCKS_EXCLUDED(Locks::thread_list_lock_, deoptimized_methods_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Indicates whether the method has been deoptimized so it is executed with the interpreter.
   bool IsDeoptimized(mirror::ArtMethod* method)
       LOCKS_EXCLUDED(deoptimized_methods_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Enable method tracing by installing instrumentation entry/exit stubs.
-  void EnableMethodTracing(
-      bool require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
+  // Enable method tracing by installing instrumentation entry/exit stubs or interpreter.
+  void EnableMethodTracing(const char* key,
+                           bool needs_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
-  // Disable method tracing by uninstalling instrumentation entry/exit stubs.
-  void DisableMethodTracing()
+  // Disable method tracing by uninstalling instrumentation entry/exit stubs or interpreter.
+  void DisableMethodTracing(const char* key)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
@@ -236,6 +240,10 @@ class Instrumentation {
     return have_method_exit_listeners_;
   }
 
+  bool HasMethodUnwindListeners() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return have_method_unwind_listeners_;
+  }
+
   bool HasDexPcListeners() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return have_dex_pc_listeners_;
   }
@@ -355,8 +363,14 @@ class Instrumentation {
       LOCKS_EXCLUDED(deoptimized_methods_lock_);
 
  private:
+  InstrumentationLevel GetCurrentInstrumentationLevel() const;
+
   // Does the job of installing or removing instrumentation code within methods.
-  void ConfigureStubs(bool require_entry_exit_stubs, bool require_interpreter)
+  // In order to support multiple clients using instrumentation at the same time,
+  // the caller must pass a unique key (a string) identifying it so we remind which
+  // instrumentation level it needs. Therefore the current instrumentation level
+  // becomes the highest instrumentation level required by a client.
+  void ConfigureStubs(const char* key, InstrumentationLevel desired_instrumentation_level)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_,
                      deoptimized_methods_lock_);
@@ -452,6 +466,11 @@ class Instrumentation {
   // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
+  // Contains the instrumentation level required by each client of the instrumentation identified
+  // by a string key.
+  typedef SafeMap<const char*, InstrumentationLevel> InstrumentationLevelTable;
+  InstrumentationLevelTable requested_instrumentation_levels_ GUARDED_BY(Locks::mutator_lock_);
+
   // The event listeners, written to with the mutator_lock_ exclusively held.
   std::list<InstrumentationListener*> method_entry_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
@@ -481,9 +500,12 @@ class Instrumentation {
   size_t quick_alloc_entry_points_instrumentation_counter_
       GUARDED_BY(Locks::instrument_entrypoints_lock_);
 
+  friend class InstrumentationTest;  // For GetCurrentInstrumentationLevel and ConfigureStubs.
+
   DISALLOW_COPY_AND_ASSIGN(Instrumentation);
 };
 std::ostream& operator<<(std::ostream& os, const Instrumentation::InstrumentationEvent& rhs);
+std::ostream& operator<<(std::ostream& os, const Instrumentation::InstrumentationLevel& rhs);
 
 // An element in the instrumentation side stack maintained in art::Thread.
 struct InstrumentationStackFrame {
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
new file mode 100644
index 0000000000..5afacb8feb
--- /dev/null
+++ b/runtime/instrumentation_test.cc
@@ -0,0 +1,791 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instrumentation.h"
+
+#include "common_runtime_test.h"
+#include "common_throws.h"
+#include "class_linker-inl.h"
+#include "dex_file.h"
+#include "handle_scope-inl.h"
+#include "jvalue.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "thread_list.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace instrumentation {
+
+class TestInstrumentationListener FINAL : public instrumentation::InstrumentationListener {
+ public:
+  TestInstrumentationListener()
+    : received_method_enter_event(false), received_method_exit_event(false),
+      received_method_unwind_event(false), received_dex_pc_moved_event(false),
+      received_field_read_event(false), received_field_written_event(false),
+      received_exception_caught_event(false), received_backward_branch_event(false) {}
+
+  virtual ~TestInstrumentationListener() {}
+
+  void MethodEntered(Thread* thread ATTRIBUTE_UNUSED,
+                     mirror::Object* this_object ATTRIBUTE_UNUSED,
+                     mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_enter_event = true;
+  }
+
+  void MethodExited(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    const JValue& return_value ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_exit_event = true;
+  }
+
+  void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_method_unwind_event = true;
+  }
+
+  void DexPcMoved(Thread* thread ATTRIBUTE_UNUSED,
+                  mirror::Object* this_object ATTRIBUTE_UNUSED,
+                  mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                  uint32_t new_dex_pc ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_dex_pc_moved_event = true;
+  }
+
+  void FieldRead(Thread* thread ATTRIBUTE_UNUSED,
+                 mirror::Object* this_object ATTRIBUTE_UNUSED,
+                 mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                 uint32_t dex_pc ATTRIBUTE_UNUSED,
+                 ArtField* field ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_field_read_event = true;
+  }
+
+  void FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
+                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    ArtField* field ATTRIBUTE_UNUSED,
+                    const JValue& field_value ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_field_written_event = true;
+  }
+
+  void ExceptionCaught(Thread* thread ATTRIBUTE_UNUSED,
+                       mirror::Throwable* exception_object ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_exception_caught_event = true;
+  }
+
+  void BackwardBranch(Thread* thread ATTRIBUTE_UNUSED,
+                      mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                      int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    received_backward_branch_event = true;
+  }
+
+  void Reset() {
+    received_method_enter_event = false;
+    received_method_exit_event = false;
+    received_method_unwind_event = false;
+    received_dex_pc_moved_event = false;
+    received_field_read_event = false;
+    received_field_written_event = false;
+    received_exception_caught_event = false;
+    received_backward_branch_event = false;
+  }
+
+  bool received_method_enter_event;
+  bool received_method_exit_event;
+  bool received_method_unwind_event;
+  bool received_dex_pc_moved_event;
+  bool received_field_read_event;
+  bool received_field_written_event;
+  bool received_exception_caught_event;
+  bool received_backward_branch_event;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(TestInstrumentationListener);
+};
+
+class InstrumentationTest : public CommonRuntimeTest {
+ public:
+  // Unique keys used to test Instrumentation::ConfigureStubs.
+  static constexpr const char* kClientOneKey = "TestClient1";
+  static constexpr const char* kClientTwoKey = "TestClient2";
+
+  void CheckConfigureStubs(const char* key, Instrumentation::InstrumentationLevel level) {
+    ScopedObjectAccess soa(Thread::Current());
+    instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Instrumentation::ConfigureStubs");
+      instr->ConfigureStubs(key, level);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+  }
+
+  Instrumentation::InstrumentationLevel GetCurrentInstrumentationLevel() {
+    return Runtime::Current()->GetInstrumentation()->GetCurrentInstrumentationLevel();
+  }
+
+  size_t GetInstrumentationUserCount() {
+    ScopedObjectAccess soa(Thread::Current());
+    return Runtime::Current()->GetInstrumentation()->requested_instrumentation_levels_.size();
+  }
+
+  void TestEvent(uint32_t instrumentation_event) {
+    ScopedObjectAccess soa(Thread::Current());
+    instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+    TestInstrumentationListener listener;
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Add instrumentation listener");
+      instr->AddListener(&listener, instrumentation_event);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+
+    mirror::ArtMethod* const event_method = nullptr;
+    mirror::Object* const event_obj = nullptr;
+    const uint32_t event_dex_pc = 0;
+
+    // Check the listener is registered and is notified of the event.
+    EXPECT_TRUE(HasEventListener(instr, instrumentation_event));
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+    ReportEvent(instr, instrumentation_event, soa.Self(), event_method, event_obj, event_dex_pc);
+    EXPECT_TRUE(DidListenerReceiveEvent(listener, instrumentation_event));
+
+    listener.Reset();
+    {
+      soa.Self()->TransitionFromRunnableToSuspended(kSuspended);
+      Runtime* runtime = Runtime::Current();
+      runtime->GetThreadList()->SuspendAll("Remove instrumentation listener");
+      instr->RemoveListener(&listener, instrumentation_event);
+      runtime->GetThreadList()->ResumeAll();
+      soa.Self()->TransitionFromSuspendedToRunnable();
+    }
+
+    // Check the listener is not registered and is not notified of the event.
+    EXPECT_FALSE(HasEventListener(instr, instrumentation_event));
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+    ReportEvent(instr, instrumentation_event, soa.Self(), event_method, event_obj, event_dex_pc);
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+  }
+
+  void DeoptimizeMethod(Thread* self, Handle<mirror::ArtMethod> method,
+                        bool enable_deoptimization)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Single method deoptimization");
+    if (enable_deoptimization) {
+      instrumentation->EnableDeoptimization();
+    }
+    instrumentation->Deoptimize(method.Get());
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void UndeoptimizeMethod(Thread* self, Handle<mirror::ArtMethod> method,
+                          const char* key, bool disable_deoptimization)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Single method undeoptimization");
+    instrumentation->Undeoptimize(method.Get());
+    if (disable_deoptimization) {
+      instrumentation->DisableDeoptimization(key);
+    }
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void DeoptimizeEverything(Thread* self, const char* key, bool enable_deoptimization)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Full deoptimization");
+    if (enable_deoptimization) {
+      instrumentation->EnableDeoptimization();
+    }
+    instrumentation->DeoptimizeEverything(key);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void UndeoptimizeEverything(Thread* self, const char* key, bool disable_deoptimization)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("Full undeoptimization");
+    instrumentation->UndeoptimizeEverything(key);
+    if (disable_deoptimization) {
+      instrumentation->DisableDeoptimization(key);
+    }
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void EnableMethodTracing(Thread* self, const char* key, bool needs_interpreter)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("EnableMethodTracing");
+    instrumentation->EnableMethodTracing(key, needs_interpreter);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+  void DisableMethodTracing(Thread* self, const char* key)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+    self->TransitionFromRunnableToSuspended(kSuspended);
+    runtime->GetThreadList()->SuspendAll("EnableMethodTracing");
+    instrumentation->DisableMethodTracing(key);
+    runtime->GetThreadList()->ResumeAll();
+    self->TransitionFromSuspendedToRunnable();
+  }
+
+ private:
+  static bool HasEventListener(const instrumentation::Instrumentation* instr, uint32_t event_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        return instr->HasMethodEntryListeners();
+      case instrumentation::Instrumentation::kMethodExited:
+        return instr->HasMethodExitListeners();
+      case instrumentation::Instrumentation::kMethodUnwind:
+        return instr->HasMethodUnwindListeners();
+      case instrumentation::Instrumentation::kDexPcMoved:
+        return instr->HasDexPcListeners();
+      case instrumentation::Instrumentation::kFieldRead:
+        return instr->HasFieldReadListeners();
+      case instrumentation::Instrumentation::kFieldWritten:
+        return instr->HasFieldWriteListeners();
+      case instrumentation::Instrumentation::kExceptionCaught:
+        return instr->HasExceptionCaughtListeners();
+      case instrumentation::Instrumentation::kBackwardBranch:
+        return instr->HasBackwardBranchListeners();
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+
+  static void ReportEvent(const instrumentation::Instrumentation* instr, uint32_t event_type,
+                          Thread* self, mirror::ArtMethod* method, mirror::Object* obj,
+                          uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        instr->MethodEnterEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kMethodExited: {
+        JValue value;
+        instr->MethodExitEvent(self, obj, method, dex_pc, value);
+        break;
+      }
+      case instrumentation::Instrumentation::kMethodUnwind:
+        instr->MethodUnwindEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kDexPcMoved:
+        instr->DexPcMovedEvent(self, obj, method, dex_pc);
+        break;
+      case instrumentation::Instrumentation::kFieldRead:
+        instr->FieldReadEvent(self, obj, method, dex_pc, nullptr);
+        break;
+      case instrumentation::Instrumentation::kFieldWritten: {
+        JValue value;
+        instr->FieldWriteEvent(self, obj, method, dex_pc, nullptr, value);
+        break;
+      }
+      case instrumentation::Instrumentation::kExceptionCaught: {
+        ThrowArithmeticExceptionDivideByZero();
+        mirror::Throwable* event_exception = self->GetException();
+        instr->ExceptionCaughtEvent(self, event_exception);
+        self->ClearException();
+        break;
+      }
+      case instrumentation::Instrumentation::kBackwardBranch:
+        instr->BackwardBranch(self, method, dex_pc);
+        break;
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+
+  static bool DidListenerReceiveEvent(const TestInstrumentationListener& listener,
+                                      uint32_t event_type) {
+    switch (event_type) {
+      case instrumentation::Instrumentation::kMethodEntered:
+        return listener.received_method_enter_event;
+      case instrumentation::Instrumentation::kMethodExited:
+        return listener.received_method_exit_event;
+      case instrumentation::Instrumentation::kMethodUnwind:
+        return listener.received_method_unwind_event;
+      case instrumentation::Instrumentation::kDexPcMoved:
+        return listener.received_dex_pc_moved_event;
+      case instrumentation::Instrumentation::kFieldRead:
+        return listener.received_field_read_event;
+      case instrumentation::Instrumentation::kFieldWritten:
+        return listener.received_field_written_event;
+      case instrumentation::Instrumentation::kExceptionCaught:
+        return listener.received_exception_caught_event;
+      case instrumentation::Instrumentation::kBackwardBranch:
+        return listener.received_backward_branch_event;
+      default:
+        LOG(FATAL) << "Unknown instrumentation event " << event_type;
+        UNREACHABLE();
+    }
+  }
+};
+
+TEST_F(InstrumentationTest, NoInstrumentation) {
+  ScopedObjectAccess soa(Thread::Current());
+  instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+  ASSERT_NE(instr, nullptr);
+
+  EXPECT_FALSE(instr->AreExitStubsInstalled());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsActive());
+  EXPECT_FALSE(instr->ShouldNotifyMethodEnterExitEvents());
+
+  // Test interpreter table is the default one.
+  EXPECT_EQ(instrumentation::kMainHandlerTable, instr->GetInterpreterHandlerTable());
+
+  // Check there is no registered listener.
+  EXPECT_FALSE(instr->HasDexPcListeners());
+  EXPECT_FALSE(instr->HasExceptionCaughtListeners());
+  EXPECT_FALSE(instr->HasFieldReadListeners());
+  EXPECT_FALSE(instr->HasFieldWriteListeners());
+  EXPECT_FALSE(instr->HasMethodEntryListeners());
+  EXPECT_FALSE(instr->HasMethodExitListeners());
+  EXPECT_FALSE(instr->IsActive());
+}
+
+// Test instrumentation listeners for each event.
+TEST_F(InstrumentationTest, MethodEntryEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodEntered);
+}
+
+TEST_F(InstrumentationTest, MethodExitEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodExited);
+}
+
+TEST_F(InstrumentationTest, MethodUnwindEvent) {
+  TestEvent(instrumentation::Instrumentation::kMethodUnwind);
+}
+
+TEST_F(InstrumentationTest, DexPcMovedEvent) {
+  TestEvent(instrumentation::Instrumentation::kDexPcMoved);
+}
+
+TEST_F(InstrumentationTest, FieldReadEvent) {
+  TestEvent(instrumentation::Instrumentation::kFieldRead);
+}
+
+TEST_F(InstrumentationTest, FieldWriteEvent) {
+  TestEvent(instrumentation::Instrumentation::kFieldWritten);
+}
+
+TEST_F(InstrumentationTest, ExceptionCaughtEvent) {
+  TestEvent(instrumentation::Instrumentation::kExceptionCaught);
+}
+
+TEST_F(InstrumentationTest, BackwardBranchEvent) {
+  TestEvent(instrumentation::Instrumentation::kBackwardBranch);
+}
+
+TEST_F(InstrumentationTest, DeoptimizeDirectMethod) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  Handle<mirror::ArtMethod> method_to_deoptimize(
+      hs.NewHandle(klass->FindDeclaredDirectMethod("instanceMethod", "()V")));
+  ASSERT_TRUE(method_to_deoptimize.Get() != nullptr);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  DeoptimizeMethod(soa.Self(), method_to_deoptimize, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  constexpr const char* instrumentation_key = "DeoptimizeDirectMethod";
+  UndeoptimizeMethod(soa.Self(), method_to_deoptimize, instrumentation_key, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+}
+
+TEST_F(InstrumentationTest, FullDeoptimization) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "FullDeoptimization";
+  DeoptimizeEverything(soa.Self(), instrumentation_key, true);
+
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  UndeoptimizeEverything(soa.Self(), instrumentation_key, true);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+TEST_F(InstrumentationTest, MixedDeoptimization) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  Handle<mirror::ArtMethod> method_to_deoptimize(
+      hs.NewHandle(klass->FindDeclaredDirectMethod("instanceMethod", "()V")));
+  ASSERT_TRUE(method_to_deoptimize.Get() != nullptr);
+
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  DeoptimizeMethod(soa.Self(), method_to_deoptimize, true);
+  // Deoptimizing a method does not change instrumentation level.
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  constexpr const char* instrumentation_key = "MixedDeoptimization";
+  DeoptimizeEverything(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter,
+            GetCurrentInstrumentationLevel());
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  UndeoptimizeEverything(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+  EXPECT_TRUE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+
+  UndeoptimizeMethod(soa.Self(), method_to_deoptimize, instrumentation_key, true);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_FALSE(instr->IsDeoptimized(method_to_deoptimize.Get()));
+}
+
+TEST_F(InstrumentationTest, MethodTracing_Interpreter) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "MethodTracing";
+  EnableMethodTracing(soa.Self(), instrumentation_key, true);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter,
+            GetCurrentInstrumentationLevel());
+  EXPECT_TRUE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  DisableMethodTracing(soa.Self(), instrumentation_key);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+TEST_F(InstrumentationTest, MethodTracing_InstrumentationEntryExitStubs) {
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  instrumentation::Instrumentation* instr = runtime->GetInstrumentation();
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+
+  constexpr const char* instrumentation_key = "MethodTracing";
+  EnableMethodTracing(soa.Self(), instrumentation_key, false);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+  EXPECT_TRUE(instr->AreExitStubsInstalled());
+
+  DisableMethodTracing(soa.Self(), instrumentation_key);
+  EXPECT_EQ(Instrumentation::InstrumentationLevel::kInstrumentNothing,
+            GetCurrentInstrumentationLevel());
+  EXPECT_FALSE(instr->AreAllMethodsDeoptimized());
+}
+
+// We use a macro to print the line number where the test is failing.
+#define CHECK_INSTRUMENTATION(_level, _user_count)                                      \
+  do {                                                                                  \
+    Instrumentation* const instr = Runtime::Current()->GetInstrumentation();            \
+    bool interpreter =                                                                  \
+      (_level == Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);    \
+    EXPECT_EQ(_level, GetCurrentInstrumentationLevel());                                \
+    EXPECT_EQ(_user_count, GetInstrumentationUserCount());                              \
+    if (instr->IsForcedInterpretOnly()) {                                               \
+      EXPECT_TRUE(instr->InterpretOnly());                                              \
+    } else if (interpreter) {                                                           \
+      EXPECT_TRUE(instr->InterpretOnly());                                              \
+    } else {                                                                            \
+      EXPECT_FALSE(instr->InterpretOnly());                                             \
+    }                                                                                   \
+    if (interpreter) {                                                                  \
+      EXPECT_TRUE(instr->AreAllMethodsDeoptimized());                                   \
+    } else {                                                                            \
+      EXPECT_FALSE(instr->AreAllMethodsDeoptimized());                                  \
+    }                                                                                   \
+  } while (false)
+
+TEST_F(InstrumentationTest, ConfigureStubs_Nothing) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check no-op.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check we can switch to instrumentation stubs
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_Interpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check we can switch to interpreter
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InstrumentationStubsToInterpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, ConfigureStubs_InterpreterToInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest,
+       ConfigureStubs_InstrumentationStubsToInterpreterToInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs again.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Check we can disable instrumentation.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_Nothing) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Check kInstrumentNothing with two clients.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with instrumentation stubs for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs
+  // instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_Interpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with interpreter for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs interpreter.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InstrumentationStubsThenInterpreter) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with instrumentation stubs for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // Configure stubs with interpreter for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs interpreter.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+TEST_F(InstrumentationTest, MultiConfigureStubs_InterpreterThenInstrumentationStubs) {
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+
+  // Configure stubs with interpreter for 1st client.
+  CheckConfigureStubs(kClientOneKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 1U);
+
+  // Configure stubs with instrumentation stubs for 2nd client.
+  CheckConfigureStubs(kClientTwoKey,
+                      Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInterpreter, 2U);
+
+  // 1st client requests instrumentation deactivation but 2nd client still needs
+  // instrumentation stubs.
+  CheckConfigureStubs(kClientOneKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentWithInstrumentationStubs,
+                        1U);
+
+  // 2nd client requests instrumentation deactivation
+  CheckConfigureStubs(kClientTwoKey, Instrumentation::InstrumentationLevel::kInstrumentNothing);
+  CHECK_INSTRUMENTATION(Instrumentation::InstrumentationLevel::kInstrumentNothing, 0U);
+}
+
+}  // namespace instrumentation
+}  // namespace art
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 878efba1a5..dd1f55e6b2 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -156,7 +156,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
   const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
   uint16_t inst_data;
   const void* const* currentHandlersTable;
-  bool notified_method_entry_event = false;
   UPDATE_HANDLER_TABLE();
   if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
     if (kIsDebugBuild) {
@@ -166,7 +165,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
-      notified_method_entry_event = true;
     }
   }
 
@@ -264,9 +262,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -281,9 +276,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -299,9 +291,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -316,9 +305,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -352,9 +338,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -2510,26 +2493,16 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF
 // Note: we do not use the kReturn instruction flag here (to test the instruction is a return). The
 // compiler seems to not evaluate "(Instruction::FlagsOf(Instruction::code) & kReturn) != 0" to
 // a constant condition that would remove the "if" statement so the test is free.
-#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                            \
-  alt_op_##code: {                                                                                \
-    if (Instruction::code != Instruction::RETURN_VOID &&                                          \
-        Instruction::code != Instruction::RETURN_VOID_NO_BARRIER &&                               \
-        Instruction::code != Instruction::RETURN &&                                               \
-        Instruction::code != Instruction::RETURN_WIDE &&                                          \
-        Instruction::code != Instruction::RETURN_OBJECT) {                                        \
-      if (LIKELY(!notified_method_entry_event)) {                                                 \
-        Runtime* runtime = Runtime::Current();                                                    \
-        const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
-        if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
-          Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
-          instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
-        }                                                                                         \
-      } else {                                                                                    \
-        notified_method_entry_event = false;                                                      \
-      }                                                                                           \
-    }                                                                                             \
-    UPDATE_HANDLER_TABLE();                                                                       \
-    goto *handlersTable[instrumentation::kMainHandlerTable][Instruction::code];                   \
+#define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                        \
+  alt_op_##code: {                                                                            \
+    Runtime* const runtime = Runtime::Current();                                              \
+    const instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();  \
+    if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                     \
+      Object* this_object = shadow_frame.GetThisObject(code_item->ins_size_);                 \
+      instrumentation->DexPcMovedEvent(self, this_object, shadow_frame.GetMethod(), dex_pc);  \
+    }                                                                                         \
+    UPDATE_HANDLER_TABLE();                                                                   \
+    goto *handlersTable[instrumentation::kMainHandlerTable][Instruction::code];               \
   }
 #include "dex_instruction_list.h"
       DEX_INSTRUCTION_LIST(INSTRUMENTATION_INSTRUCTION_HANDLER)
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index a5e5299b34..0e3420ffb5 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -47,10 +47,7 @@ namespace interpreter {
 // Code to run before each dex instruction.
 #define PREAMBLE()                                                                              \
   do {                                                                                          \
-    DCHECK(!inst->IsReturn());                                                                  \
-    if (UNLIKELY(notified_method_entry_event)) {                                                \
-      notified_method_entry_event = false;                                                      \
-    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                \
+    if (UNLIKELY(instrumentation->HasDexPcListeners())) {                                       \
       instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),  \
                                        shadow_frame.GetMethod(), dex_pc);                       \
     }                                                                                           \
@@ -67,7 +64,6 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
   self->VerifyStack();
 
   uint32_t dex_pc = shadow_frame.GetDexPC();
-  bool notified_method_entry_event = false;
   const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
   if (LIKELY(dex_pc == 0)) {  // We are entering the method as opposed to deoptimizing.
     if (kIsDebugBuild) {
@@ -76,7 +72,6 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         shadow_frame.GetMethod(), 0);
-      notified_method_entry_event = true;
     }
   }
   const uint16_t* const insns = code_item->insns_;
@@ -171,19 +166,18 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
         break;
       }
       case Instruction::RETURN_VOID_NO_BARRIER: {
+        PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_VOID: {
+        PREAMBLE();
         QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         self->AllowThreadSuspension();
@@ -191,13 +185,11 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN: {
+        PREAMBLE();
         JValue result;
         result.SetJ(0);
         result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
@@ -206,13 +198,11 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_WIDE: {
+        PREAMBLE();
         JValue result;
         result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
@@ -220,13 +210,11 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
       case Instruction::RETURN_OBJECT: {
+        PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
         const size_t ref_idx = inst->VRegA_11x(inst_data);
@@ -254,9 +242,6 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
-        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
-          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
-                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 1ec800fce6..ab3f2e4ddd 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -141,6 +141,8 @@ static bool NeedsFullDeoptimization(JdwpEventKind eventKind) {
     }
 }
 
+// Returns the instrumentation event the DebugInstrumentationListener must
+// listen to in order to properly report the given JDWP event to the debugger.
 static uint32_t GetInstrumentationEventFor(JdwpEventKind eventKind) {
   switch (eventKind) {
     case EK_BREAKPOINT:
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index f5ad8b837c..c698cfc180 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -19,8 +19,6 @@
 
 #include <unordered_map>
 
-#include "instrumentation.h"
-
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index e2f9cec2ce..32326740c6 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -77,7 +77,7 @@ void JitInstrumentationCache::AddSamples(Thread* self, mirror::ArtMethod* method
   ScopedObjectAccessUnchecked soa(self);
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
   // than we want resulting in samples even after the method is compiled.
-  if (method->IsClassInitializer() ||
+  if (method->IsClassInitializer() || method->IsNative() ||
       Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
     return;
   }
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 9bb08a23d2..fd386d7b38 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2099,6 +2099,35 @@ class JNI {
         return JNI_ERR;
       }
       bool is_fast = false;
+      // Notes about fast JNI calls:
+      //
+      // On a normal JNI call, the calling thread usually transitions
+      // from the kRunnable state to the kNative state. But if the
+      // called native function needs to access any Java object, it
+      // will have to transition back to the kRunnable state.
+      //
+      // There is a cost to this double transition. For a JNI call
+      // that should be quick, this cost may dominate the call cost.
+      //
+      // On a fast JNI call, the calling thread avoids this double
+      // transition by not transitioning from kRunnable to kNative and
+      // stays in the kRunnable state.
+      //
+      // There are risks to using a fast JNI call because it can delay
+      // a response to a thread suspension request which is typically
+      // used for a GC root scanning, etc. If a fast JNI call takes a
+      // long time, it could cause longer thread suspension latency
+      // and GC pauses.
+      //
+      // Thus, fast JNI should be used with care. It should be used
+      // for a JNI call that takes a short amount of time (eg. no
+      // long-running loop) and does not block (eg. no locks, I/O,
+      // etc.)
+      //
+      // A '!' prefix in the signature in the JNINativeMethod
+      // indicates that it's a fast JNI call and the runtime omits the
+      // thread state transition from kRunnable to kNative at the
+      // entry.
       if (*sig == '!') {
         is_fast = true;
         ++sig;
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 959bb75c93..cf4233c3d1 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -153,7 +153,7 @@ static bool ContainedWithinExistingMap(uint8_t* ptr, size_t size,
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
   *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
                             "any existing map. See process maps in the log.", begin, end);
   return false;
@@ -256,7 +256,7 @@ MemMap* MemMap::MapAnonymous(const char* name, uint8_t* expected_ptr, size_t byt
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
     flags |= MAP_FIXED;
   }
 
@@ -411,7 +411,7 @@ MemMap* MemMap::MapFileAtAddress(uint8_t* expected_ptr, size_t byte_count, int p
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -617,13 +617,68 @@ bool MemMap::CheckNoGaps(MemMap* begin_map, MemMap* end_map) {
   return true;
 }
 
-void MemMap::DumpMaps(std::ostream& os) {
+void MemMap::DumpMaps(std::ostream& os, bool terse) {
   MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
-  DumpMapsLocked(os);
+  DumpMapsLocked(os, terse);
 }
 
-void MemMap::DumpMapsLocked(std::ostream& os) {
-  os << *maps_;
+void MemMap::DumpMapsLocked(std::ostream& os, bool terse) {
+  const auto& mem_maps = *maps_;
+  if (!terse) {
+    os << mem_maps;
+    return;
+  }
+
+  // Terse output example:
+  //   [MemMap: 0x409be000+0x20P~0x11dP+0x20P~0x61cP+0x20P prot=0x3 LinearAlloc]
+  //   [MemMap: 0x451d6000+0x6bP(3) prot=0x3 large object space allocation]
+  // The details:
+  //   "+0x20P" means 0x20 pages taken by a single mapping,
+  //   "~0x11dP" means a gap of 0x11d pages,
+  //   "+0x6bP(3)" means 3 mappings one after another, together taking 0x6b pages.
+  os << "MemMap:" << std::endl;
+  for (auto it = mem_maps.begin(), maps_end = mem_maps.end(); it != maps_end;) {
+    MemMap* map = it->second;
+    void* base = it->first;
+    CHECK_EQ(base, map->BaseBegin());
+    os << "[MemMap: " << base;
+    ++it;
+    // Merge consecutive maps with the same protect flags and name.
+    constexpr size_t kMaxGaps = 9;
+    size_t num_gaps = 0;
+    size_t num = 1u;
+    size_t size = map->BaseSize();
+    CHECK(IsAligned<kPageSize>(size));
+    void* end = map->BaseEnd();
+    while (it != maps_end &&
+        it->second->GetProtect() == map->GetProtect() &&
+        it->second->GetName() == map->GetName() &&
+        (it->second->BaseBegin() == end || num_gaps < kMaxGaps)) {
+      if (it->second->BaseBegin() != end) {
+        ++num_gaps;
+        os << "+0x" << std::hex << (size / kPageSize) << "P";
+        if (num != 1u) {
+          os << "(" << std::dec << num << ")";
+        }
+        size_t gap =
+            reinterpret_cast<uintptr_t>(it->second->BaseBegin()) - reinterpret_cast<uintptr_t>(end);
+        CHECK(IsAligned<kPageSize>(gap));
+        os << "~0x" << std::hex << (gap / kPageSize) << "P";
+        num = 0u;
+        size = 0u;
+      }
+      CHECK(IsAligned<kPageSize>(it->second->BaseSize()));
+      ++num;
+      size += it->second->BaseSize();
+      end = it->second->BaseEnd();
+      ++it;
+    }
+    os << "+0x" << std::hex << (size / kPageSize) << "P";
+    if (num != 1u) {
+      os << "(" << std::dec << num << ")";
+    }
+    os << " prot=0x" << std::hex << map->GetProtect() << " " << map->GetName() << "]" << std::endl;
+  }
 }
 
 bool MemMap::HasMemMap(MemMap* map) {
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index dc6d93536d..6023a703c5 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -137,7 +137,7 @@ class MemMap {
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       LOCKS_EXCLUDED(Locks::mem_maps_lock_);
-  static void DumpMaps(std::ostream& os)
+  static void DumpMaps(std::ostream& os, bool terse = false)
       LOCKS_EXCLUDED(Locks::mem_maps_lock_);
 
   typedef AllocationTrackingMultiMap<void*, MemMap*, kAllocatorTagMaps> Maps;
@@ -149,7 +149,7 @@ class MemMap {
   MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_begin, size_t base_size,
          int prot, bool reuse) LOCKS_EXCLUDED(Locks::mem_maps_lock_);
 
-  static void DumpMapsLocked(std::ostream& os)
+  static void DumpMapsLocked(std::ostream& os, bool terse)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_);
   static bool HasMemMap(MemMap* map)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mem_maps_lock_);
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 543cf9bae3..9518c9d797 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -511,7 +511,6 @@ QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo() {
   if (class_linker->IsQuickGenericJniStub(entry_point)) {
     // Generic JNI frame.
     DCHECK(IsNative());
-    StackHandleScope<1> hs(Thread::Current());
     uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(this) + 1;
     size_t scope_size = HandleScope::SizeOf(handle_refs);
     QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 17fbc4f85d..1d7d853431 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -84,7 +84,8 @@ static jobject VMStack_getCallingClassLoader(JNIEnv* env, jclass) {
 static jobject VMStack_getClosestUserClassLoader(JNIEnv* env, jclass) {
   struct ClosestUserClassLoaderVisitor : public StackVisitor {
     explicit ClosestUserClassLoaderVisitor(Thread* thread)
-      : StackVisitor(thread, nullptr), class_loader(nullptr) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        class_loader(nullptr) {}
 
     bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       DCHECK(class_loader == nullptr);
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index be7022e281..6569d833c5 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -84,6 +84,7 @@ static jint Thread_nativeGetStatus(JNIEnv* env, jobject java_thread, jboolean ha
     case kWaitingInMainDebuggerLoop:      return kJavaWaiting;
     case kWaitingForDebuggerSuspension:   return kJavaWaiting;
     case kWaitingForDeoptimization:       return kJavaWaiting;
+    case kWaitingForGetObjectsAllocated:  return kJavaWaiting;
     case kWaitingForJniOnLoad:            return kJavaWaiting;
     case kWaitingForSignalCatcherOutput:  return kJavaWaiting;
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h
index 632ccdedc0..d2d7fa8a21 100644
--- a/runtime/nth_caller_visitor.h
+++ b/runtime/nth_caller_visitor.h
@@ -27,8 +27,11 @@ class Thread;
 // Walks up the stack 'n' callers, when used with Thread::WalkStack.
 struct NthCallerVisitor : public StackVisitor {
   NthCallerVisitor(Thread* thread, size_t n_in, bool include_runtime_and_upcalls = false)
-      : StackVisitor(thread, nullptr), n(n_in),
-        include_runtime_and_upcalls_(include_runtime_and_upcalls), count(0), caller(nullptr) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        n(n_in),
+        include_runtime_and_upcalls_(include_runtime_and_upcalls),
+        count(0),
+        caller(nullptr) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/runtime/oat.h b/runtime/oat.h
index a31e09a3cf..aaf442a77a 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@ class InstructionSetFeatures;
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '1', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 2f67263285..d07c09cd9a 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -96,9 +96,8 @@ OatFileAssistant::OatFileAssistant(const char* dex_location,
 
 OatFileAssistant::~OatFileAssistant() {
   // Clean up the lock file.
-  if (lock_file_.get() != nullptr) {
-    lock_file_->Erase();
-    TEMP_FAILURE_RETRY(unlink(lock_file_->GetPath().c_str()));
+  if (flock_.HasFile()) {
+    TEMP_FAILURE_RETRY(unlink(flock_.GetFile()->GetPath().c_str()));
   }
 }
 
@@ -121,7 +120,7 @@ bool OatFileAssistant::IsInBootClassPath() {
 
 bool OatFileAssistant::Lock(std::string* error_msg) {
   CHECK(error_msg != nullptr);
-  CHECK(lock_file_.get() == nullptr) << "OatFileAssistant::Lock already acquired";
+  CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired";
 
   if (OatFileName() == nullptr) {
     *error_msg = "Failed to determine lock file";
@@ -129,13 +128,7 @@ bool OatFileAssistant::Lock(std::string* error_msg) {
   }
   std::string lock_file_name = *OatFileName() + ".flock";
 
-  lock_file_.reset(OS::CreateEmptyFile(lock_file_name.c_str()));
-  if (lock_file_.get() == nullptr) {
-    *error_msg = "Failed to create lock file " + lock_file_name;
-    return false;
-  }
-
-  if (!flock_.Init(lock_file_.get(), error_msg)) {
+  if (!flock_.Init(lock_file_name.c_str(), error_msg)) {
     TEMP_FAILURE_RETRY(unlink(lock_file_name.c_str()));
     return false;
   }
@@ -465,7 +458,7 @@ bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) {
 
   const ImageInfo* image_info = GetImageInfo();
   if (image_info == nullptr) {
-    VLOG(oat) << "No image for to check oat relocation against.";
+    VLOG(oat) << "No image to check oat relocation against.";
     return false;
   }
 
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index a25ee31d0d..4c0b0e26e6 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -353,7 +353,6 @@ class OatFileAssistant {
   // To implement Lock(), we lock a dummy file where the oat file would go
   // (adding ".flock" to the target file name) and retain the lock for the
   // remaining lifetime of the OatFileAssistant object.
-  std::unique_ptr<File> lock_file_;
   ScopedFlock flock_;
 
   // In a properly constructed OatFileAssistant object, dex_location_ should
@@ -405,9 +404,9 @@ class OatFileAssistant {
   bool cached_oat_file_name_found_;
   std::string cached_oat_file_name_;
 
-  // Cached value of the loaded odex file.
+  // Cached value of the loaded oat file.
   // Use the GetOatFile method rather than accessing this directly, unless you
-  // know the odex file isn't out of date.
+  // know the oat file isn't out of date.
   bool oat_file_load_attempted_ = false;
   std::unique_ptr<OatFile> cached_oat_file_;
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 3f6b2d2cc6..865fcb063a 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -29,6 +29,7 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
+#include "gc/space/image_space.h"
 #include "mem_map.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
@@ -610,10 +611,23 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) {
   // Things aren't relocated, so it should fall back to interpreted.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
+
   EXPECT_FALSE(oat_file->IsExecutable());
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
   EXPECT_EQ(1u, dex_files.size());
+
+  // Add some extra checks to help diagnose apparently flaky test failures.
+  Runtime* runtime = Runtime::Current();
+  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
+  ASSERT_TRUE(image_space != nullptr);
+  const ImageHeader& image_header = image_space->GetImageHeader();
+  const OatHeader& oat_header = oat_file->GetOatHeader();
+  EXPECT_FALSE(oat_file->IsPic());
+  EXPECT_EQ(image_header.GetOatChecksum(), oat_header.GetImageFileLocationOatChecksum());
+  EXPECT_NE(reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()),
+      oat_header.GetImageFileLocationOatDataBegin());
+  EXPECT_NE(image_header.GetPatchDelta(), oat_header.GetImagePatchDelta());
 }
 
 // Case: We have a DEX file and a PIC ODEX file, but no OAT file.
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 63a38437f7..3b0e6c1062 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -58,8 +58,10 @@ class BoundedStackVisitor : public StackVisitor {
   BoundedStackVisitor(std::vector<std::pair<mirror::ArtMethod*, uint32_t>>* stack,
       Thread* thread, uint32_t max_depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr), stack_(stack), max_depth_(max_depth), depth_(0) {
-  }
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        stack_(stack),
+        max_depth_(max_depth),
+        depth_(0) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index a80eed6073..730759a71b 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -46,7 +46,9 @@ class CatchBlockStackVisitor FINAL : public StackVisitor {
   CatchBlockStackVisitor(Thread* self, Context* context, Handle<mirror::Throwable>* exception,
                          QuickExceptionHandler* exception_handler)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(self, context), self_(self), exception_(exception),
+      : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        self_(self),
+        exception_(exception),
         exception_handler_(exception_handler) {
   }
 
@@ -160,7 +162,9 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor {
  public:
   DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(self, context), self_(self), exception_handler_(exception_handler),
+      : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        self_(self),
+        exception_handler_(exception_handler),
         prev_shadow_frame_(nullptr) {
     CHECK(!self_->HasDeoptimizationShadowFrame());
   }
@@ -223,7 +227,10 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor {
           break;
         case kReferenceVReg: {
           uint32_t value = 0;
-          if (GetVReg(h_method.Get(), reg, kind, &value)) {
+          // Check IsReferenceVReg in case the compiled GC map doesn't agree with the verifier.
+          // We don't want to copy a stale reference into the shadow frame as a reference.
+          // b/20736048
+          if (GetVReg(h_method.Get(), reg, kind, &value) && IsReferenceVReg(h_method.Get(), reg)) {
             new_frame->SetVRegReference(reg, reinterpret_cast<mirror::Object*>(value));
           } else {
             new_frame->SetVReg(reg, kDeadValue);
@@ -335,7 +342,7 @@ class InstrumentationStackVisitor : public StackVisitor {
  public:
   InstrumentationStackVisitor(Thread* self, size_t frame_depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(self, nullptr),
+      : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         frame_depth_(frame_depth),
         instrumentation_frames_to_pop_(0) {
     CHECK_NE(frame_depth_, kInvalidFrameDepth);
@@ -346,7 +353,12 @@ class InstrumentationStackVisitor : public StackVisitor {
     if (current_frame_depth < frame_depth_) {
       CHECK(GetMethod() != nullptr);
       if (UNLIKELY(reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()) == GetReturnPc())) {
-        ++instrumentation_frames_to_pop_;
+        if (!IsInInlinedFrame()) {
+          // We do not count inlined frames, because we do not instrument them. The reason we
+          // include them in the stack walking is the check against `frame_depth_`, which is
+          // given to us by a visitor that visits inlined frames.
+          ++instrumentation_frames_to_pop_;
+        }
       }
       return true;
     } else {
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 329ceb561d..49e1b8edf6 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -520,23 +520,6 @@ JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnab
   return result;
 }
 
-void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
-                           JValue* result) {
-  // We want to make sure that the stack is not within a small distance from the
-  // protected region in case we are calling into a leaf function whose stack
-  // check has been elided.
-  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
-    ThrowStackOverflowError(self);
-    return;
-  }
-  uint32_t shorty_len;
-  const char* shorty = shadow_frame->GetMethod()->GetShorty(&shorty_len);
-  ArgArray arg_array(shorty, shorty_len);
-  arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
-  shadow_frame->GetMethod()->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result,
-                                    shorty);
-}
-
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs, size_t num_frames) {
   // We want to make sure that the stack is not within a small distance from the
diff --git a/runtime/reflection.h b/runtime/reflection.h
index 6305d68bd5..37f8a6af55 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -61,10 +61,6 @@ JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnab
                                            jobject obj, jmethodID mid, va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
-                           JValue* result)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
 // num_frames is number of frames we look up for access check.
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject method, jobject receiver,
                      jobject args, size_t num_frames = 1)
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 99750a16d0..60ed55a914 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -133,6 +133,7 @@ class ScopedObjectAccessAlreadyRunnable {
   T AddLocalReference(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
+    DCHECK_NE(obj, Runtime::Current()->GetClearedJniWeakGlobal());
     return obj == nullptr ? nullptr : Env()->AddLocalReference<T>(obj);
   }
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index e49bc1d78f..800acaa320 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -19,6 +19,7 @@
 #include "arch/context.h"
 #include "base/hex_dump.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "gc_map.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
@@ -84,16 +85,20 @@ bool ManagedStack::ShadowFramesContain(StackReference<mirror::Object>* shadow_fr
   return false;
 }
 
-StackVisitor::StackVisitor(Thread* thread, Context* context)
-    : thread_(thread), cur_shadow_frame_(nullptr),
-      cur_quick_frame_(nullptr), cur_quick_frame_pc_(0), num_frames_(0), cur_depth_(0),
-      context_(context) {
-  DCHECK(thread == Thread::Current() || thread->IsSuspended()) << *thread;
-}
-
-StackVisitor::StackVisitor(Thread* thread, Context* context, size_t num_frames)
-    : thread_(thread), cur_shadow_frame_(nullptr),
-      cur_quick_frame_(nullptr), cur_quick_frame_pc_(0), num_frames_(num_frames), cur_depth_(0),
+StackVisitor::StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind)
+    : StackVisitor(thread, context, walk_kind, 0) {}
+
+StackVisitor::StackVisitor(Thread* thread,
+                           Context* context,
+                           StackWalkKind walk_kind,
+                           size_t num_frames)
+    : thread_(thread),
+      walk_kind_(walk_kind),
+      cur_shadow_frame_(nullptr),
+      cur_quick_frame_(nullptr),
+      cur_quick_frame_pc_(0),
+      num_frames_(num_frames),
+      cur_depth_(0),
       context_(context) {
   DCHECK(thread == Thread::Current() || thread->IsSuspended()) << *thread;
 }
@@ -151,6 +156,33 @@ size_t StackVisitor::GetNativePcOffset() const {
   return GetMethod()->NativeQuickPcOffset(cur_quick_frame_pc_);
 }
 
+bool StackVisitor::IsReferenceVReg(mirror::ArtMethod* m, uint16_t vreg) {
+  // Process register map (which native and runtime methods don't have)
+  if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) {
+    return false;
+  }
+  if (m->IsOptimized(sizeof(void*))) {
+    return true;  // TODO: Implement.
+  }
+  const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
+  CHECK(native_gc_map != nullptr) << PrettyMethod(m);
+  const DexFile::CodeItem* code_item = m->GetCodeItem();
+  // Can't be null or how would we compile its instructions?
+  DCHECK(code_item != nullptr) << PrettyMethod(m);
+  NativePcOffsetToReferenceMap map(native_gc_map);
+  size_t num_regs = std::min(map.RegWidth() * 8, static_cast<size_t>(code_item->registers_size_));
+  const uint8_t* reg_bitmap = nullptr;
+  if (num_regs > 0) {
+    Runtime* runtime = Runtime::Current();
+    const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(m, sizeof(void*));
+    uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
+    reg_bitmap = map.FindBitMap(native_pc_offset);
+    DCHECK(reg_bitmap != nullptr);
+  }
+  // Does this register hold a reference?
+  return vreg < num_regs && TestBitmap(vreg, reg_bitmap);
+}
+
 bool StackVisitor::GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind,
                            uint32_t* val) const {
   if (cur_quick_frame_ != nullptr) {
@@ -537,10 +569,10 @@ void StackVisitor::SetReturnPc(uintptr_t new_ret_pc) {
   *reinterpret_cast<uintptr_t*>(pc_addr) = new_ret_pc;
 }
 
-size_t StackVisitor::ComputeNumFrames(Thread* thread) {
+size_t StackVisitor::ComputeNumFrames(Thread* thread, StackWalkKind walk_kind) {
   struct NumFramesVisitor : public StackVisitor {
-    explicit NumFramesVisitor(Thread* thread_in)
-        : StackVisitor(thread_in, nullptr), frames(0) {}
+    NumFramesVisitor(Thread* thread_in, StackWalkKind walk_kind_in)
+        : StackVisitor(thread_in, nullptr, walk_kind_in), frames(0) {}
 
     bool VisitFrame() OVERRIDE {
       frames++;
@@ -549,16 +581,23 @@ size_t StackVisitor::ComputeNumFrames(Thread* thread) {
 
     size_t frames;
   };
-  NumFramesVisitor visitor(thread);
+  NumFramesVisitor visitor(thread, walk_kind);
   visitor.WalkStack(true);
   return visitor.frames;
 }
 
 bool StackVisitor::GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32_t* next_dex_pc) {
   struct HasMoreFramesVisitor : public StackVisitor {
-    explicit HasMoreFramesVisitor(Thread* thread, size_t num_frames, size_t frame_height)
-        : StackVisitor(thread, nullptr, num_frames), frame_height_(frame_height),
-          found_frame_(false), has_more_frames_(false), next_method_(nullptr), next_dex_pc_(0) {
+    HasMoreFramesVisitor(Thread* thread,
+                         StackWalkKind walk_kind,
+                         size_t num_frames,
+                         size_t frame_height)
+        : StackVisitor(thread, nullptr, walk_kind, num_frames),
+          frame_height_(frame_height),
+          found_frame_(false),
+          has_more_frames_(false),
+          next_method_(nullptr),
+          next_dex_pc_(0) {
     }
 
     bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -582,7 +621,7 @@ bool StackVisitor::GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32
     mirror::ArtMethod* next_method_;
     uint32_t next_dex_pc_;
   };
-  HasMoreFramesVisitor visitor(thread_, GetNumFrames(), GetFrameHeight());
+  HasMoreFramesVisitor visitor(thread_, walk_kind_, GetNumFrames(), GetFrameHeight());
   visitor.WalkStack(true);
   *next_method = visitor.next_method_;
   *next_dex_pc = visitor.next_dex_pc_;
@@ -592,7 +631,7 @@ bool StackVisitor::GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32
 void StackVisitor::DescribeStack(Thread* thread) {
   struct DescribeStackVisitor : public StackVisitor {
     explicit DescribeStackVisitor(Thread* thread_in)
-        : StackVisitor(thread_in, nullptr) {}
+        : StackVisitor(thread_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
 
     bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       LOG(INFO) << "Frame Id=" << GetFrameId() << " " << DescribeLocation();
diff --git a/runtime/stack.h b/runtime/stack.h
index 3f1bff8b9c..bf6101619d 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -409,8 +409,17 @@ class PACKED(4) ManagedStack {
 };
 
 class StackVisitor {
+ public:
+  // This enum defines a flag to control whether inlined frames are included
+  // when walking the stack.
+  enum class StackWalkKind {
+    kIncludeInlinedFrames,
+    kSkipInlinedFrames,
+  };
+
  protected:
-  StackVisitor(Thread* thread, Context* context) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  public:
   virtual ~StackVisitor() {}
@@ -465,7 +474,7 @@ class StackVisitor {
 
   size_t GetNumFrames() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (num_frames_ == 0) {
-      num_frames_ = ComputeNumFrames(thread_);
+      num_frames_ = ComputeNumFrames(thread_, walk_kind_);
     }
     return num_frames_;
   }
@@ -478,6 +487,9 @@ class StackVisitor {
   bool GetNextMethodAndDexPc(mirror::ArtMethod** next_method, uint32_t* next_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool IsReferenceVReg(mirror::ArtMethod* m, uint16_t vreg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool GetVReg(mirror::ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -598,6 +610,10 @@ class StackVisitor {
     return sizeof(StackReference<mirror::ArtMethod>) + (out_num * sizeof(uint32_t));
   }
 
+  bool IsInInlinedFrame() const {
+    return false;
+  }
+
   uintptr_t GetCurrentQuickFramePc() const {
     return cur_quick_frame_pc_;
   }
@@ -618,13 +634,14 @@ class StackVisitor {
 
   std::string DescribeLocation() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static size_t ComputeNumFrames(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static size_t ComputeNumFrames(Thread* thread, StackWalkKind walk_kind)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void DescribeStack(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   // Private constructor known in the case that num_frames_ has already been computed.
-  StackVisitor(Thread* thread, Context* context, size_t num_frames)
+  StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind, size_t num_frames)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsAccessibleRegister(uint32_t reg, bool is_float) const {
@@ -687,6 +704,7 @@ class StackVisitor {
   void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Thread* const thread_;
+  const StackWalkKind walk_kind_;
   ShadowFrame* cur_shadow_frame_;
   StackReference<mirror::ArtMethod>* cur_quick_frame_;
   uintptr_t cur_quick_frame_pc_;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c8aad1b787..148bb6d7d7 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -940,10 +940,14 @@ void Thread::DumpState(std::ostream& os) const {
 struct StackDumpVisitor : public StackVisitor {
   StackDumpVisitor(std::ostream& os_in, Thread* thread_in, Context* context, bool can_allocate_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread_in, context), os(os_in), thread(thread_in),
-        can_allocate(can_allocate_in), last_method(nullptr), last_line_number(0),
-        repetition_count(0), frame_count(0) {
-  }
+      : StackVisitor(thread_in, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        os(os_in),
+        thread(thread_in),
+        can_allocate(can_allocate_in),
+        last_method(nullptr),
+        last_line_number(0),
+        repetition_count(0),
+        frame_count(0) {}
 
   virtual ~StackDumpVisitor() {
     if (frame_count == 0) {
@@ -1528,7 +1532,7 @@ class CountStackDepthVisitor : public StackVisitor {
  public:
   explicit CountStackDepthVisitor(Thread* thread)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr),
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         depth_(0), skip_depth_(0), skipping_(true) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1568,8 +1572,12 @@ template<bool kTransactionActive>
 class BuildInternalStackTraceVisitor : public StackVisitor {
  public:
   explicit BuildInternalStackTraceVisitor(Thread* self, Thread* thread, int skip_depth)
-      : StackVisitor(thread, nullptr), self_(self),
-        skip_depth_(skip_depth), count_(0), dex_pc_trace_(nullptr), method_trace_(nullptr) {}
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        self_(self),
+        skip_depth_(skip_depth),
+        count_(0),
+        dex_pc_trace_(nullptr),
+        method_trace_(nullptr) {}
 
   bool Init(int depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -2072,6 +2080,7 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) {
   QUICK_ENTRY_POINT_INFO(pNewStringFromString)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
@@ -2111,7 +2120,10 @@ Context* Thread::GetLongJumpContext() {
 struct CurrentMethodVisitor FINAL : public StackVisitor {
   CurrentMethodVisitor(Thread* thread, Context* context, bool abort_on_error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_object_(nullptr), method_(nullptr), dex_pc_(0),
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_object_(nullptr),
+        method_(nullptr),
+        dex_pc_(0),
         abort_on_error_(abort_on_error) {}
   bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
@@ -2154,7 +2166,10 @@ class ReferenceMapVisitor : public StackVisitor {
  public:
   ReferenceMapVisitor(Thread* thread, Context* context, RootVisitor& visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), visitor_(visitor) {}
+        // We are visiting the references in compiled frames, so we do not need
+        // to know the inlined frames.
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kSkipInlinedFrames),
+        visitor_(visitor) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (false) {
@@ -2311,10 +2326,6 @@ class ReferenceMapVisitor : public StackVisitor {
     }
   }
 
-  static bool TestBitmap(size_t reg, const uint8_t* reg_vector) {
-    return ((reg_vector[reg / kBitsPerByte] >> (reg % kBitsPerByte)) & 0x01) != 0;
-  }
-
   // Visitor for when we visit a root.
   RootVisitor& visitor_;
 };
diff --git a/runtime/thread.h b/runtime/thread.h
index e766daabed..9346813ec3 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -752,6 +752,18 @@ class Thread {
     tls32_.ready_for_debug_invoke = ready;
   }
 
+  bool IsDebugMethodEntry() const {
+    return tls32_.debug_method_entry_;
+  }
+
+  void SetDebugMethodEntry() {
+    tls32_.debug_method_entry_ = true;
+  }
+
+  void ClearDebugMethodEntry() {
+    tls32_.debug_method_entry_ = false;
+  }
+
   // Activates single step control for debugging. The thread takes the
   // ownership of the given SingleStepControl*. It is deleted by a call
   // to DeactivateSingleStepControl or upon thread destruction.
@@ -1028,7 +1040,7 @@ class Thread {
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false), suspended_at_suspend_check(false),
-      ready_for_debug_invoke(false) {
+      ready_for_debug_invoke(false), debug_method_entry_(false) {
     }
 
     union StateAndFlags state_and_flags;
@@ -1077,6 +1089,10 @@ class Thread {
     // used to invoke method from the debugger which is only allowed when
     // the thread is suspended by an event.
     bool32_t ready_for_debug_invoke;
+
+    // True if the thread enters a method. This is used to detect method entry
+    // event for the debugger.
+    bool32_t debug_method_entry_;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/runtime/thread_state.h b/runtime/thread_state.h
index b5479edb80..c7ea7f4381 100644
--- a/runtime/thread_state.h
+++ b/runtime/thread_state.h
@@ -42,6 +42,7 @@ enum ThreadState {
   kWaitingForDeoptimization,        // WAITING        TS_WAIT      waiting for deoptimization suspend all
   kWaitingForMethodTracingStart,    // WAITING        TS_WAIT      waiting for method tracing to start
   kWaitingForVisitObjects,          // WAITING        TS_WAIT      waiting for visiting objects
+  kWaitingForGetObjectsAllocated,   // WAITING        TS_WAIT      waiting for getting the number of allocated objects
   kStarting,                        // NEW            TS_WAIT      native thread started, not yet ready to run managed code
   kNative,                          // RUNNABLE       TS_RUNNING   running in a JNI native method
   kSuspended,                       // RUNNABLE       TS_RUNNING   suspended by GC or debugger
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 9eca517dca..76367923c0 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -91,8 +91,9 @@ static constexpr uint8_t kOpNewThread = 2U;
 
 class BuildStackTraceVisitor : public StackVisitor {
  public:
-  explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, nullptr),
-      method_trace_(Trace::AllocStackTrace()) {}
+  explicit BuildStackTraceVisitor(Thread* thread)
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        method_trace_(Trace::AllocStackTrace()) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
@@ -126,6 +127,9 @@ Trace* volatile Trace::the_trace_ = nullptr;
 pthread_t Trace::sampling_pthread_ = 0U;
 std::unique_ptr<std::vector<mirror::ArtMethod*>> Trace::temp_stack_trace_;
 
+// The key identifying the tracer to update instrumentation.
+static constexpr const char* kTracerInstrumentationKey = "Tracer";
+
 static mirror::ArtMethod* DecodeTraceMethodId(uint32_t tmid) {
   return reinterpret_cast<mirror::ArtMethod*>(tmid & ~kTraceMethodActionMask);
 }
@@ -393,7 +397,7 @@ void Trace::Start(const char* trace_filename, int trace_fd, size_t buffer_size,
                                                    instrumentation::Instrumentation::kMethodExited |
                                                    instrumentation::Instrumentation::kMethodUnwind);
         // TODO: In full-PIC mode, we don't need to fully deopt.
-        runtime->GetInstrumentation()->EnableMethodTracing();
+        runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey);
       }
     }
   }
@@ -440,7 +444,7 @@ void Trace::StopTracing(bool finish_tracing, bool flush_file) {
       MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
     } else {
-      runtime->GetInstrumentation()->DisableMethodTracing();
+      runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
       runtime->GetInstrumentation()->RemoveListener(
           the_trace, instrumentation::Instrumentation::kMethodEntered |
           instrumentation::Instrumentation::kMethodExited |
@@ -522,7 +526,7 @@ void Trace::Pause() {
       MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
     } else {
-      runtime->GetInstrumentation()->DisableMethodTracing();
+      runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
       runtime->GetInstrumentation()->RemoveListener(the_trace,
                                                     instrumentation::Instrumentation::kMethodEntered |
                                                     instrumentation::Instrumentation::kMethodExited |
@@ -566,7 +570,7 @@ void Trace::Resume() {
                                                instrumentation::Instrumentation::kMethodExited |
                                                instrumentation::Instrumentation::kMethodUnwind);
     // TODO: In full-PIC mode, we don't need to fully deopt.
-    runtime->GetInstrumentation()->EnableMethodTracing();
+    runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey);
   }
 
   runtime->GetThreadList()->ResumeAll();
diff --git a/runtime/utf.h b/runtime/utf.h
index dd38afa172..7f05248c29 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -87,9 +87,9 @@ size_t ComputeModifiedUtf8Hash(const char* chars);
 /*
  * Retrieve the next UTF-16 character or surrogate pair from a UTF-8 string.
  * single byte, 2-byte and 3-byte UTF-8 sequences result in a single UTF-16
- * character whereas 4-byte UTF-8 sequences result in a surrogate pair. Use
- * GetLeadingUtf16Char and GetTrailingUtf16Char to process the return value
- * of this function.
+ * character (possibly one half of a surrogate) whereas 4-byte UTF-8 sequences
+ * result in a surrogate pair. Use GetLeadingUtf16Char and GetTrailingUtf16Char
+ * to process the return value of this function.
  *
  * Advances "*utf8_data_in" to the start of the next character.
  *
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 650214f67b..7986cdcbf9 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -827,14 +827,21 @@ bool IsValidPartOfMemberNameUtf8Slow(const char** pUtf8Ptr) {
    */
 
   const uint32_t pair = GetUtf16FromUtf8(pUtf8Ptr);
-
   const uint16_t leading = GetLeadingUtf16Char(pair);
-  const uint32_t trailing = GetTrailingUtf16Char(pair);
 
-  if (trailing == 0) {
-    // Perform follow-up tests based on the high 8 bits of the
-    // lower surrogate.
-    switch (leading >> 8) {
+  // We have a surrogate pair resulting from a valid 4 byte UTF sequence.
+  // No further checks are necessary because 4 byte sequences span code
+  // points [U+10000, U+1FFFFF], which are valid codepoints in a dex
+  // identifier. Furthermore, GetUtf16FromUtf8 guarantees that each of
+  // the surrogate halves are valid and well formed in this instance.
+  if (GetTrailingUtf16Char(pair) != 0) {
+    return true;
+  }
+
+
+  // We've encountered a one, two or three byte UTF-8 sequence. The
+  // three byte UTF-8 sequence could be one half of a surrogate pair.
+  switch (leading >> 8) {
     case 0x00:
       // It's only valid if it's above the ISO-8859-1 high space (0xa0).
       return (leading > 0x00a0);
@@ -842,9 +849,14 @@ bool IsValidPartOfMemberNameUtf8Slow(const char** pUtf8Ptr) {
     case 0xd9:
     case 0xda:
     case 0xdb:
-      // It looks like a leading surrogate but we didn't find a trailing
-      // surrogate if we're here.
-      return false;
+      {
+        // We found a three byte sequence encoding one half of a surrogate.
+        // Look for the other half.
+        const uint32_t pair2 = GetUtf16FromUtf8(pUtf8Ptr);
+        const uint16_t trailing = GetLeadingUtf16Char(pair2);
+
+        return (GetTrailingUtf16Char(pair2) == 0) && (0xdc00 <= trailing && trailing <= 0xdfff);
+      }
     case 0xdc:
     case 0xdd:
     case 0xde:
@@ -855,21 +867,19 @@ bool IsValidPartOfMemberNameUtf8Slow(const char** pUtf8Ptr) {
     case 0xff:
       // It's in the range that has spaces, controls, and specials.
       switch (leading & 0xfff8) {
-      case 0x2000:
-      case 0x2008:
-      case 0x2028:
-      case 0xfff0:
-      case 0xfff8:
-        return false;
+        case 0x2000:
+        case 0x2008:
+        case 0x2028:
+        case 0xfff0:
+        case 0xfff8:
+          return false;
       }
-      break;
-    }
-
-    return true;
+      return true;
+    default:
+      return true;
   }
 
-  // We have a surrogate pair. Check that trailing surrogate is well formed.
-  return (trailing >= 0xdc00 && trailing <= 0xdfff);
+  UNREACHABLE();
 }
 
 /* Return whether the pointed-at modified-UTF-8 encoded character is
diff --git a/runtime/utils.h b/runtime/utils.h
index eaafcf0a64..71ccf85277 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -604,6 +604,11 @@ std::unique_ptr<T> MakeUnique(Args&& ... args) {
   return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
 }
 
+inline bool TestBitmap(size_t idx, const uint8_t* bitmap) {
+  return ((bitmap[idx / kBitsPerByte] >> (idx % kBitsPerByte)) & 0x01) != 0;
+}
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 195de0c121..869d305120 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -521,4 +521,27 @@ TEST_F(UtilsTest, TestSleep) {
   EXPECT_GT(NanoTime() - start, MsToNs(1000));
 }
 
+TEST_F(UtilsTest, IsValidDescriptor) {
+  std::vector<uint8_t> descriptor(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, ';', 0x00 });
+  EXPECT_TRUE(IsValidDescriptor(reinterpret_cast<char*>(&descriptor[0])));
+
+  std::vector<uint8_t> unpaired_surrogate(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, ';', 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate[0])));
+
+  std::vector<uint8_t> unpaired_surrogate_at_end(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xa0, 0x80, 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate_at_end[0])));
+
+  std::vector<uint8_t> invalid_surrogate(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xb0, 0x80, ';', 0x00 });
+  EXPECT_FALSE(IsValidDescriptor(reinterpret_cast<char*>(&invalid_surrogate[0])));
+
+  std::vector<uint8_t> unpaired_surrogate_with_multibyte_sequence(
+      { 'L', 'a', '/', 'b', '$', 0xed, 0xb0, 0x80, 0xf0, 0x9f, 0x8f, 0xa0, ';', 0x00 });
+  EXPECT_FALSE(
+      IsValidDescriptor(reinterpret_cast<char*>(&unpaired_surrogate_with_multibyte_sequence[0])));
+}
+
 }  // namespace art
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 0e90c4d6a2..4dfa73cbaf 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -236,15 +236,6 @@ public class Main {
     String str10 = "abcdefghij";
     String str40 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabc";
 
-    int supplementaryChar = 0x20b9f;
-    String surrogatePair = "\ud842\udf9f";
-    String stringWithSurrogates = "hello " + surrogatePair + " world";
-
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar), "hello ".length());
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 2), "hello ".length());
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 6), 6);
-    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 7), -1);
-
     Assert.assertEquals(str0.indexOf('a'), -1);
     Assert.assertEquals(str3.indexOf('a'), 0);
     Assert.assertEquals(str3.indexOf('b'), 1);
@@ -269,24 +260,123 @@ public class Main {
     Assert.assertEquals(str40.indexOf('a',10), 10);
     Assert.assertEquals(str40.indexOf('b',40), -1);
 
+    testIndexOfNull();
+
+    // Same data as above, but stored so it's not a literal in the next test. -2 stands for
+    // indexOf(I) instead of indexOf(II).
+    start--;
+    int[][] searchData = {
+        { 'a', -2, -1 },
+        { 'a', -2, 0 },
+        { 'b', -2, 1 },
+        { 'c', -2, 2 },
+        { 'j', -2, 9 },
+        { 'a', -2, 0 },
+        { 'b', -2, 38 },
+        { 'c', -2, 39 },
+        { 'a', 20, -1 },
+        { 'a', 0, -1 },
+        { 'a', -1, -1 },
+        { '/', ++start, -1 },
+        { 'a', negIndex[0], -1 },
+        { 'a', 0, 0 },
+        { 'a', 1, -1 },
+        { 'a', 1234, -1 },
+        { 'b', 0, 1 },
+        { 'b', 1, 1 },
+        { 'c', 2, 2 },
+        { 'j', 5, 9 },
+        { 'j', 9, 9 },
+        { 'a', 10, 10 },
+        { 'b', 40, -1 },
+    };
+    testStringIndexOfChars(searchData);
+
+    testSurrogateIndexOf();
+  }
+
+  private static void testStringIndexOfChars(int[][] searchData) {
+    // Use a try-catch to avoid inlining.
+    try {
+      testStringIndexOfCharsImpl(searchData);
+    } catch (Exception e) {
+      System.out.println("Unexpected exception");
+    }
+  }
+
+  private static void testStringIndexOfCharsImpl(int[][] searchData) {
+    String str0 = "";
+    String str1 = "/";
+    String str3 = "abc";
+    String str10 = "abcdefghij";
+    String str40 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabc";
+
+    Assert.assertEquals(str0.indexOf(searchData[0][0]), searchData[0][2]);
+    Assert.assertEquals(str3.indexOf(searchData[1][0]), searchData[1][2]);
+    Assert.assertEquals(str3.indexOf(searchData[2][0]), searchData[2][2]);
+    Assert.assertEquals(str3.indexOf(searchData[3][0]), searchData[3][2]);
+    Assert.assertEquals(str10.indexOf(searchData[4][0]), searchData[4][2]);
+    Assert.assertEquals(str40.indexOf(searchData[5][0]), searchData[5][2]);
+    Assert.assertEquals(str40.indexOf(searchData[6][0]), searchData[6][2]);
+    Assert.assertEquals(str40.indexOf(searchData[7][0]), searchData[7][2]);
+    Assert.assertEquals(str0.indexOf(searchData[8][0], searchData[8][1]), searchData[8][2]);
+    Assert.assertEquals(str0.indexOf(searchData[9][0], searchData[9][1]), searchData[9][2]);
+    Assert.assertEquals(str0.indexOf(searchData[10][0], searchData[10][1]), searchData[10][2]);
+    Assert.assertEquals(str1.indexOf(searchData[11][0], searchData[11][1]), searchData[11][2]);
+    Assert.assertEquals(str1.indexOf(searchData[12][0], searchData[12][1]), searchData[12][2]);
+    Assert.assertEquals(str3.indexOf(searchData[13][0], searchData[13][1]), searchData[13][2]);
+    Assert.assertEquals(str3.indexOf(searchData[14][0], searchData[14][1]), searchData[14][2]);
+    Assert.assertEquals(str3.indexOf(searchData[15][0], searchData[15][1]), searchData[15][2]);
+    Assert.assertEquals(str3.indexOf(searchData[16][0], searchData[16][1]), searchData[16][2]);
+    Assert.assertEquals(str3.indexOf(searchData[17][0], searchData[17][1]), searchData[17][2]);
+    Assert.assertEquals(str3.indexOf(searchData[18][0], searchData[18][1]), searchData[18][2]);
+    Assert.assertEquals(str10.indexOf(searchData[19][0], searchData[19][1]), searchData[19][2]);
+    Assert.assertEquals(str10.indexOf(searchData[20][0], searchData[20][1]), searchData[20][2]);
+    Assert.assertEquals(str40.indexOf(searchData[21][0], searchData[21][1]), searchData[21][2]);
+    Assert.assertEquals(str40.indexOf(searchData[22][0], searchData[22][1]), searchData[22][2]);
+  }
+
+  private static void testSurrogateIndexOf() {
+    int supplementaryChar = 0x20b9f;
+    String surrogatePair = "\ud842\udf9f";
+    String stringWithSurrogates = "hello " + surrogatePair + " world";
+
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar), "hello ".length());
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 2), "hello ".length());
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 6), 6);
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar, 7), -1);
+
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar - 0x10000), -1);
+    Assert.assertEquals(stringWithSurrogates.indexOf(supplementaryChar | 0x80000000), -1);
+  }
+
+  private static void testIndexOfNull() {
     String strNull = null;
     try {
-      strNull.indexOf('a');
+      testNullIndex(strNull, 'a');
       Assert.fail();
     } catch (NullPointerException expected) {
     }
     try {
-      strNull.indexOf('a', 0);
+      testNullIndex(strNull, 'a', 0);
       Assert.fail();
     } catch (NullPointerException expected) {
     }
     try {
-      strNull.indexOf('a', -1);
+        testNullIndex(strNull, 'a', -1);
       Assert.fail();
     } catch (NullPointerException expected) {
     }
   }
 
+  private static int testNullIndex(String strNull, int c) {
+    return strNull.indexOf(c);
+  }
+
+  private static int testNullIndex(String strNull, int c, int startIndex) {
+    return strNull.indexOf(c, startIndex);
+  }
+
   public static void test_String_compareTo() {
     String test = "0123456789";
     String test1 = new String("0123456789");    // different object
diff --git a/test/127-secondarydex/expected.txt b/test/127-secondarydex/expected.txt
index 29a1411ad3..1c8defb6ec 100644
--- a/test/127-secondarydex/expected.txt
+++ b/test/127-secondarydex/expected.txt
@@ -1,3 +1,4 @@
 testSlowPathDirectInvoke
 Test
 Got null pointer exception
+Test
diff --git a/test/127-secondarydex/src/Main.java b/test/127-secondarydex/src/Main.java
index c921c5b0c8..0ede8ed2b2 100644
--- a/test/127-secondarydex/src/Main.java
+++ b/test/127-secondarydex/src/Main.java
@@ -24,6 +24,7 @@ import java.lang.reflect.Method;
 public class Main {
     public static void main(String[] args) {
         testSlowPathDirectInvoke();
+        testString();
     }
 
     public static void testSlowPathDirectInvoke() {
@@ -40,4 +41,11 @@ public class Main {
             System.out.println("Got unexpected exception " + e);
         }
     }
+
+    // For string change, test that String.<init> is compiled properly in
+    // secondary dex. See http://b/20870917
+    public static void testString() {
+        Test t = new Test();
+        System.out.println(t.toString());
+    }
 }
diff --git a/test/127-secondarydex/src/Test.java b/test/127-secondarydex/src/Test.java
index 82cb901374..8547e791c2 100644
--- a/test/127-secondarydex/src/Test.java
+++ b/test/127-secondarydex/src/Test.java
@@ -22,4 +22,8 @@ public class Test extends Super {
     private void print() {
         System.out.println("Test");
     }
+
+    public String toString() {
+        return new String("Test");
+    }
 }
diff --git a/test/431-optimizing-arith-shifts/src/Main.java b/test/431-optimizing-arith-shifts/src/Main.java
index d8667c63c5..86422bd8e7 100644
--- a/test/431-optimizing-arith-shifts/src/Main.java
+++ b/test/431-optimizing-arith-shifts/src/Main.java
@@ -52,7 +52,7 @@ public class Main {
     expectEquals(Integer.MIN_VALUE, $opt$Shl(1073741824, 1));  // overflow
     expectEquals(1073741824, $opt$Shl(268435456, 2));
 
-   // othe nly 5 lower bits should be used for shifting (& 0x1f).
+    // Only the 5 lower bits should be used for shifting (& 0x1f).
     expectEquals(7, $opt$Shl(7, 32));  // 32 & 0x1f = 0
     expectEquals(14, $opt$Shl(7, 33));  // 33 & 0x1f = 1
     expectEquals(32, $opt$Shl(1, 101));  // 101 & 0x1f = 5
@@ -97,6 +97,13 @@ public class Main {
 
     expectEquals(Long.MIN_VALUE, $opt$Shl(7L, Long.MAX_VALUE));
     expectEquals(7L, $opt$Shl(7L, Long.MIN_VALUE));
+
+    // Exercise some special cases handled by backends/simplifier.
+    expectEquals(24L, $opt$ShlConst1(12L));
+    expectEquals(0x2345678900000000L, $opt$ShlConst32(0x123456789L));
+    expectEquals(0x2490249000000000L, $opt$ShlConst33(0x12481248L));
+    expectEquals(0x4920492000000000L, $opt$ShlConst34(0x12481248L));
+    expectEquals(0x9240924000000000L, $opt$ShlConst35(0x12481248L));
   }
 
   private static void shrInt() {
@@ -277,7 +284,7 @@ public class Main {
     return a >>> 2L;
   }
 
-    static int $opt$ShlConst0(int a) {
+  static int $opt$ShlConst0(int a) {
     return a << 0;
   }
 
@@ -301,5 +308,25 @@ public class Main {
     return a >>> 0L;
   }
 
+  static long $opt$ShlConst1(long a) {
+    return a << 1L;
+  }
+
+  static long $opt$ShlConst32(long a) {
+    return a << 32L;
+  }
+
+  static long $opt$ShlConst33(long a) {
+    return a << 33L;
+  }
+
+  static long $opt$ShlConst34(long a) {
+    return a << 34L;
+  }
+
+  static long $opt$ShlConst35(long a) {
+    return a << 35L;
+  }
+
 }
 
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 6b21fed66c..c89ab4dffe 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -16,6 +16,12 @@
 
 public class Main {
 
+  public static void assertFalse(boolean condition) {
+    if (condition) {
+      throw new Error();
+    }
+  }
+
   public static void assertIntEquals(int expected, int result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -407,6 +413,54 @@ public class Main {
     return arg ^ arg;
   }
 
+  // CHECK-START: boolean Main.CmpFloatGreaterThanNaN(float) constant_folding (before)
+  // CHECK-DAG:     [[Arg:f\d+]]      ParameterValue
+  // CHECK-DAG:     [[ConstNan:f\d+]] FloatConstant nan
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:                       IntConstant 1
+  // CHECK-DAG:     [[Cmp:i\d+]]      Compare [ [[Arg]] [[ConstNan]] ]
+  // CHECK-DAG:     [[Le:z\d+]]       LessThanOrEqual [ [[Cmp]] [[Const0]] ]
+  // CHECK-DAG:                       If [ [[Le]] ]
+
+  // CHECK-START: boolean Main.CmpFloatGreaterThanNaN(float) constant_folding (after)
+  // CHECK-DAG:                       ParameterValue
+  // CHECK-DAG:                       FloatConstant nan
+  // CHECK-DAG:                       IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:                       If [ [[Const1]] ]
+
+  // CHECK-START: boolean Main.CmpFloatGreaterThanNaN(float) constant_folding (after)
+  // CHECK-NOT:                       Compare
+  // CHECK-NOT:                       LessThanOrEqual
+
+  public static boolean CmpFloatGreaterThanNaN(float arg) {
+    return arg > Float.NaN;
+  }
+
+  // CHECK-START: boolean Main.CmpDoubleLessThanNaN(double) constant_folding (before)
+  // CHECK-DAG:     [[Arg:d\d+]]      ParameterValue
+  // CHECK-DAG:     [[ConstNan:d\d+]] DoubleConstant nan
+  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+  // CHECK-DAG:                       IntConstant 1
+  // CHECK-DAG:     [[Cmp:i\d+]]      Compare [ [[Arg]] [[ConstNan]] ]
+  // CHECK-DAG:     [[Ge:z\d+]]       GreaterThanOrEqual [ [[Cmp]] [[Const0]] ]
+  // CHECK-DAG:                       If [ [[Ge]] ]
+
+  // CHECK-START: boolean Main.CmpDoubleLessThanNaN(double) constant_folding (after)
+  // CHECK-DAG:                       ParameterValue
+  // CHECK-DAG:                       DoubleConstant nan
+  // CHECK-DAG:                       IntConstant 0
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:                       If [ [[Const1]] ]
+
+  // CHECK-START: boolean Main.CmpDoubleLessThanNaN(double) constant_folding (after)
+  // CHECK-NOT:                       Compare
+  // CHECK-NOT:                       GreaterThanOrEqual
+
+  public static boolean CmpDoubleLessThanNaN(double arg) {
+    return arg < Double.NaN;
+  }
+
   public static void main(String[] args) {
     assertIntEquals(IntNegation(), -42);
     assertIntEquals(IntAddition1(), 3);
@@ -417,17 +471,19 @@ public class Main {
     assertIntEquals(StaticCondition(), 5);
     assertIntEquals(JumpsAndConditionals(true), 7);
     assertIntEquals(JumpsAndConditionals(false), 3);
-    int random = 123456;  // Chosen randomly.
-    assertIntEquals(And0(random), 0);
-    assertLongEquals(Mul0(random), 0);
-    assertIntEquals(OrAllOnes(random), -1);
-    assertLongEquals(Rem0(random), 0);
-    assertIntEquals(Rem1(random), 0);
-    assertLongEquals(RemN1(random), 0);
-    assertIntEquals(Shl0(random), 0);
-    assertLongEquals(Shr0(random), 0);
-    assertLongEquals(SubSameLong(random), 0);
-    assertIntEquals(UShr0(random), 0);
-    assertIntEquals(XorSameInt(random), 0);
+    int arbitrary = 123456;  // Value chosen arbitrarily.
+    assertIntEquals(And0(arbitrary), 0);
+    assertLongEquals(Mul0(arbitrary), 0);
+    assertIntEquals(OrAllOnes(arbitrary), -1);
+    assertLongEquals(Rem0(arbitrary), 0);
+    assertIntEquals(Rem1(arbitrary), 0);
+    assertLongEquals(RemN1(arbitrary), 0);
+    assertIntEquals(Shl0(arbitrary), 0);
+    assertLongEquals(Shr0(arbitrary), 0);
+    assertLongEquals(SubSameLong(arbitrary), 0);
+    assertIntEquals(UShr0(arbitrary), 0);
+    assertIntEquals(XorSameInt(arbitrary), 0);
+    assertFalse(CmpFloatGreaterThanNaN(arbitrary));
+    assertFalse(CmpDoubleLessThanNaN(arbitrary));
   }
 }
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 6b4bc11086..0ef2964e35 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -29,7 +29,9 @@ class TestVisitor : public StackVisitor {
  public:
   TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_value_(this_value), found_method_index_(0) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_value_(this_value),
+        found_method_index_(0) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
index 0a83ac0738..dffbfa47d8 100644
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ b/test/455-set-vreg/set_vreg_jni.cc
@@ -29,7 +29,8 @@ class TestVisitor : public StackVisitor {
  public:
   TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_value_(this_value) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_value_(this_value) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index 1b32348e25..193ab9dc4e 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -29,7 +29,7 @@ class TestVisitor : public StackVisitor {
  public:
   TestVisitor(Thread* thread, Context* context)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 5d5a6b3627..efb7b83e33 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -223,6 +223,24 @@ public class Main {
     return arg << 0;
   }
 
+  // CHECK-START: int Main.Shl1(int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
+  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+  // CHECK-DAG:     [[Shl:i\d+]]      Shl [ [[Arg]] [[Const1]] ]
+  // CHECK-DAG:                       Return [ [[Shl]] ]
+
+  // CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
+  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Arg]] [[Arg]] ]
+  // CHECK-DAG:                       Return [ [[Add]] ]
+
+  // CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Shl
+
+  public static int Shl1(int arg) {
+    return arg << 1;
+  }
+
   // CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
   // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
   // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
@@ -1060,5 +1078,6 @@ public class Main {
     assertDoubleEquals(Div2(150.0), 75.0);
     assertFloatEquals(DivMP25(100.0f), -400.0f);
     assertDoubleEquals(DivMP25(150.0), -600.0);
+    assertLongEquals(Shl1(100), 200);
   }
 }
diff --git a/test/461-get-reference-vreg/get_reference_vreg_jni.cc b/test/461-get-reference-vreg/get_reference_vreg_jni.cc
index f0b78e1f5e..a8ef684e93 100644
--- a/test/461-get-reference-vreg/get_reference_vreg_jni.cc
+++ b/test/461-get-reference-vreg/get_reference_vreg_jni.cc
@@ -29,7 +29,9 @@ class TestVisitor : public StackVisitor {
  public:
   TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_value_(this_value), found_method_index_(0) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        this_value_(this_value),
+        found_method_index_(0) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc
index 6715ba17e6..4724e8ebe4 100644
--- a/test/466-get-live-vreg/get_live_vreg_jni.cc
+++ b/test/466-get-live-vreg/get_live_vreg_jni.cc
@@ -28,7 +28,7 @@ namespace {
 class TestVisitor : public StackVisitor {
  public:
   TestVisitor(Thread* thread, Context* context) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context) {}
+      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
diff --git a/test/468-checker-bool-simplifier-regression/smali/TestCase.smali b/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
index f36304d333..6ff43910d5 100644
--- a/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
+++ b/test/468-checker-bool-simplifier-regression/smali/TestCase.smali
@@ -18,6 +18,19 @@
 
 .field public static value:Z
 
+# CHECK-START: boolean TestCase.testCase() boolean_simplifier (before)
+# CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
+# CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
+# CHECK-DAG:     [[Value:z\d+]]    StaticFieldGet
+# CHECK-DAG:                       If [ [[Value]] ]
+# CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
+# CHECK-DAG:                       Return [ [[Phi]] ]
+
+# CHECK-START: boolean TestCase.testCase() boolean_simplifier (after)
+# CHECK-DAG:     [[Value:z\d+]]    StaticFieldGet
+# CHECK-DAG:     [[Not:z\d+]]      BooleanNot [ [[Value]] ]
+# CHECK-DAG:                       Return [ [[Not]] ]
+
 .method public static testCase()Z
     .registers 2
     sget-boolean v0, LTestCase;->value:Z
diff --git a/test/468-checker-bool-simplifier-regression/src/Main.java b/test/468-checker-bool-simplifier-regression/src/Main.java
index d45f3bfa16..8fe05c7a8a 100644
--- a/test/468-checker-bool-simplifier-regression/src/Main.java
+++ b/test/468-checker-bool-simplifier-regression/src/Main.java
@@ -18,19 +18,6 @@ import java.lang.reflect.*;
 
 public class Main {
 
-  // CHECK-START: boolean TestCase.testCase() boolean_simplifier (before)
-  // CHECK-DAG:     [[Const0:i\d+]]   IntConstant 0
-  // CHECK-DAG:     [[Const1:i\d+]]   IntConstant 1
-  // CHECK-DAG:     [[Value:z\d+]]    StaticFieldGet
-  // CHECK-DAG:                       If [ [[Value]] ]
-  // CHECK-DAG:     [[Phi:i\d+]]      Phi [ [[Const1]] [[Const0]] ]
-  // CHECK-DAG:                       Return [ [[Phi]] ]
-
-  // CHECK-START: boolean TestCase.testCase() boolean_simplifier (after)
-  // CHECK-DAG:     [[Value:z\d+]]    StaticFieldGet
-  // CHECK-DAG:     [[Not:z\d+]]      BooleanNot [ [[Value]] ]
-  // CHECK-DAG:                       Return [ [[Not]] ]
-
   public static boolean runTest(boolean input) throws Exception {
     Class<?> c = Class.forName("TestCase");
     Method m = c.getMethod("testCase");
diff --git a/test/485-checker-dce-loop-update/expected.txt b/test/485-checker-dce-loop-update/expected.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/test/485-checker-dce-loop-update/expected.txt
diff --git a/test/485-checker-dce-loop-update/info.txt b/test/485-checker-dce-loop-update/info.txt
new file mode 100644
index 0000000000..fccf10cc8e
--- /dev/null
+++ b/test/485-checker-dce-loop-update/info.txt
@@ -0,0 +1,2 @@
+Tests loop information update after DCE because block removal can disconnect loops, leaving other
+live blocks outside the loop they had been a member of.
+\ No newline at end of file
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
new file mode 100644
index 0000000000..3873ac50c7
--- /dev/null
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -0,0 +1,275 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+.method public static $inline$True()Z
+  .registers 1
+  const/4 v0, 1
+  return v0
+.end method
+
+
+# CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (before)
+# CHECK-DAG:     [[ArgX:i\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgY:z\d+]]  ParameterValue
+# CHECK-DAG:     [[Cst1:i\d+]]  IntConstant 1
+# CHECK-DAG:     [[Cst5:i\d+]]  IntConstant 5
+# CHECK-DAG:     [[Cst7:i\d+]]  IntConstant 7
+# CHECK-DAG:     [[PhiX:i\d+]]  Phi [ [[ArgX]] [[Add5:i\d+]] [[Add7:i\d+]] ] loop_header:[[HeaderY:B\d+]]
+# CHECK-DAG:                    If [ [[ArgY]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[Cst1]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add5]]       Add [ [[PhiX]] [[Cst5]] ]                    loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add7]]       Add [ [[PhiX]] [[Cst7]] ]                    loop_header:[[HeaderY]]
+# CHECK-DAG:                    Return [ [[PhiX]] ]                          loop_header:null
+
+# CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination_final (after)
+# CHECK-DAG:     [[ArgX:i\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgY:z\d+]]  ParameterValue
+# CHECK-DAG:     [[Cst7:i\d+]]  IntConstant 7
+# CHECK-DAG:     [[PhiX:i\d+]]  Phi [ [[ArgX]] [[AddX:i\d+]] ]               loop_header:[[HeaderY:B\d+]]
+# CHECK-DAG:                    If [ [[ArgY]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:     [[AddX]]       Add [ [[PhiX]] [[Cst7]] ]                    loop_header:[[HeaderY]]
+# CHECK-DAG:                    Return [ [[PhiX]] ]                          loop_header:null
+
+.method public static testSingleExit(IZ)I
+  .registers 3
+
+  # p0 = int X
+  # p1 = boolean Y
+  # v0 = true
+
+  invoke-static {}, LTestCase;->$inline$True()Z
+  move-result v0
+
+  :loop_start
+  if-eqz p1, :loop_body   # cannot be determined statically
+  if-nez v0, :loop_end    # will always exit
+
+  # Dead block
+  add-int/lit8 p0, p0, 5
+  goto :loop_start
+
+  # Live block
+  :loop_body
+  add-int/lit8 p0, p0, 7
+  goto :loop_start
+
+  :loop_end
+  return p0
+.end method
+
+
+# CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (before)
+# CHECK-DAG:     [[ArgX:i\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgY:z\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgZ:z\d+]]  ParameterValue
+# CHECK-DAG:     [[Cst1:i\d+]]  IntConstant 1
+# CHECK-DAG:     [[Cst5:i\d+]]  IntConstant 5
+# CHECK-DAG:     [[Cst7:i\d+]]  IntConstant 7
+# CHECK-DAG:     [[PhiX:i\d+]]  Phi [ [[ArgX]] [[Add5:i\d+]] [[Add7:i\d+]] ] loop_header:[[HeaderY:B\d+]]
+# CHECK-DAG:                    If [ [[ArgY]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[ArgZ]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[Cst1]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add5]]       Add [ [[PhiX]] [[Cst5]] ]                    loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add7]]       Add [ [[PhiX]] [[Cst7]] ]                    loop_header:[[HeaderY]]
+# CHECK-DAG:                    Return [ [[PhiX]] ]                          loop_header:null
+
+# CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination_final (after)
+# CHECK-DAG:     [[ArgX:i\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgY:z\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgZ:z\d+]]  ParameterValue
+# CHECK-DAG:     [[Cst7:i\d+]]  IntConstant 7
+# CHECK-DAG:     [[PhiX:i\d+]]  Phi [ [[ArgX]] [[Add7:i\d+]] ]               loop_header:[[HeaderY:B\d+]]
+# CHECK-DAG:                    If [ [[ArgY]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add7]]       Add [ [[PhiX]] [[Cst7]] ]                    loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[ArgZ]] ]                              loop_header:null
+# CHECK-DAG:                    Return [ [[PhiX]] ]                          loop_header:null
+
+.method public static testMultipleExits(IZZ)I
+  .registers 4
+
+  # p0 = int X
+  # p1 = boolean Y
+  # p2 = boolean Z
+  # v0 = true
+
+  invoke-static {}, LTestCase;->$inline$True()Z
+  move-result v0
+
+  :loop_start
+  if-eqz p1, :loop_body   # cannot be determined statically
+  if-nez p2, :loop_end    # may exit
+  if-nez v0, :loop_end    # will always exit
+
+  # Dead block
+  add-int/lit8 p0, p0, 5
+  goto :loop_start
+
+  # Live block
+  :loop_body
+  add-int/lit8 p0, p0, 7
+  goto :loop_start
+
+  :loop_end
+  return p0
+.end method
+
+
+# CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (before)
+# CHECK-DAG:     [[ArgX:i\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgY:z\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgZ:z\d+]]  ParameterValue
+# CHECK-DAG:     [[Cst1:i\d+]]  IntConstant 1
+# CHECK-DAG:     [[Cst5:i\d+]]  IntConstant 5
+# CHECK-DAG:     [[Cst7:i\d+]]  IntConstant 7
+# CHECK-DAG:     [[Cst9:i\d+]]  IntConstant 9
+# CHECK-DAG:     [[PhiX1:i\d+]] Phi [ [[ArgX]] [[Add5:i\d+]] [[Add7:i\d+]] ] loop_header:[[HeaderY:B\d+]]
+# CHECK-DAG:                    If [ [[ArgY]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[ArgZ]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Mul9:i\d+]]  Mul [ [[PhiX1]] [[Cst9]] ]                   loop_header:[[HeaderY]]
+# CHECK-DAG:     [[PhiX2:i\d+]] Phi [ [[Mul9]] [[PhiX1]] ]                   loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[Cst1]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add5]]       Add [ [[PhiX2]] [[Cst5]] ]                   loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add7]]       Add [ [[PhiX1]] [[Cst7]] ]                   loop_header:[[HeaderY]]
+# CHECK-DAG:                    Return [ [[PhiX2]] ]                         loop_header:null
+
+# CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination_final (after)
+# CHECK-DAG:     [[ArgX:i\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgY:z\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgZ:z\d+]]  ParameterValue
+# CHECK-DAG:     [[Cst7:i\d+]]  IntConstant 7
+# CHECK-DAG:     [[Cst9:i\d+]]  IntConstant 9
+# CHECK-DAG:     [[PhiX1:i\d+]] Phi [ [[ArgX]] [[Add7:i\d+]] ]               loop_header:[[HeaderY:B\d+]]
+# CHECK-DAG:                    If [ [[ArgY]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add7]]       Add [ [[PhiX1]] [[Cst7]] ]                   loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[ArgZ]] ]                              loop_header:null
+# CHECK-DAG:     [[Mul9:i\d+]]  Mul [ [[PhiX1]] [[Cst9]] ]                   loop_header:null
+# CHECK-DAG:     [[PhiX2:i\d+]] Phi [ [[Mul9]] [[PhiX1]] ]                   loop_header:null
+# CHECK-DAG:                    Return [ [[PhiX2]] ]                         loop_header:null
+
+.method public static testExitPredecessors(IZZ)I
+  .registers 4
+
+  # p0 = int X
+  # p1 = boolean Y
+  # p2 = boolean Z
+  # v0 = true
+
+  invoke-static {}, LTestCase;->$inline$True()Z
+  move-result v0
+
+  :loop_start
+  if-eqz p1, :loop_body   # cannot be determined statically
+
+  # Additional logic which will end up outside the loop
+  if-eqz p2, :skip_if
+  mul-int/lit8 p0, p0, 9
+  :skip_if
+
+  if-nez v0, :loop_end    # will always take the branch
+
+  # Dead block
+  add-int/lit8 p0, p0, 5
+  goto :loop_start
+
+  # Live block
+  :loop_body
+  add-int/lit8 p0, p0, 7
+  goto :loop_start
+
+  :loop_end
+  return p0
+.end method
+
+
+# CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (before)
+# CHECK-DAG:     [[ArgX:i\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgY:z\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgZ:z\d+]]  ParameterValue
+# CHECK-DAG:     [[Cst0:i\d+]]  IntConstant 0
+# CHECK-DAG:     [[Cst1:i\d+]]  IntConstant 1
+# CHECK-DAG:     [[Cst5:i\d+]]  IntConstant 5
+# CHECK-DAG:     [[Cst7:i\d+]]  IntConstant 7
+#
+# CHECK-DAG:     [[PhiX:i\d+]]  Phi [ [[ArgX]] [[Add5:i\d+]] [[Add7:i\d+]] ] loop_header:[[HeaderY:B\d+]]
+# CHECK-DAG:     [[PhiZ1:i\d+]] Phi [ [[ArgZ]] [[XorZ:i\d+]] [[PhiZ1]] ]     loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[ArgY]] ]                              loop_header:[[HeaderY]]
+#
+#                               ### Inner loop ###
+# CHECK-DAG:     [[PhiZ2:i\d+]] Phi [ [[PhiZ1]] [[XorZ]] ]                   loop_header:[[HeaderZ:B\d+]]
+# CHECK-DAG:     [[XorZ]]       Xor [ [[PhiZ2]] [[Cst1]] ]                   loop_header:[[HeaderZ]]
+# CHECK-DAG:     [[CondZ:z\d+]] Equal [ [[XorZ]] [[Cst0]] ]                  loop_header:[[HeaderZ]]
+# CHECK-DAG:                    If [ [[CondZ]] ]                             loop_header:[[HeaderZ]]
+#
+# CHECK-DAG:     [[Add5]]       Add [ [[PhiX]] [[Cst5]] ]                    loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add7]]       Add [ [[PhiX]] [[Cst7]] ]                    loop_header:[[HeaderY]]
+# CHECK-DAG:                    Return [ [[PhiX]] ]                          loop_header:null
+
+# CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination_final (after)
+# CHECK-DAG:     [[ArgX:i\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgY:z\d+]]  ParameterValue
+# CHECK-DAG:     [[ArgZ:z\d+]]  ParameterValue
+# CHECK-DAG:     [[Cst0:i\d+]]  IntConstant 0
+# CHECK-DAG:     [[Cst1:i\d+]]  IntConstant 1
+# CHECK-DAG:     [[Cst7:i\d+]]  IntConstant 7
+#
+# CHECK-DAG:     [[PhiX:i\d+]]  Phi [ [[ArgX]] [[Add7:i\d+]] ]               loop_header:[[HeaderY:B\d+]]
+# CHECK-DAG:     [[PhiZ1:i\d+]] Phi [ [[ArgZ]] [[PhiZ1]] ]                   loop_header:[[HeaderY]]
+# CHECK-DAG:                    If [ [[ArgY]] ]                              loop_header:[[HeaderY]]
+# CHECK-DAG:     [[Add7]]       Add [ [[PhiX]] [[Cst7]] ]                    loop_header:[[HeaderY]]
+#
+#                               ### Inner loop ###
+# CHECK-DAG:     [[PhiZ2:i\d+]] Phi [ [[PhiZ1]] [[XorZ:i\d+]] ]              loop_header:[[HeaderZ:B\d+]]
+# CHECK-DAG:     [[XorZ]]       Xor [ [[PhiZ2]] [[Cst1]] ]                   loop_header:[[HeaderZ]]
+# CHECK-DAG:     [[CondZ:z\d+]] Equal [ [[XorZ]] [[Cst0]] ]                  loop_header:[[HeaderZ]]
+# CHECK-DAG:                    If [ [[CondZ]] ]                             loop_header:[[HeaderZ]]
+#
+# CHECK-DAG:                    Return [ [[PhiX]] ]                          loop_header:null
+
+.method public static testInnerLoop(IZZ)I
+  .registers 4
+
+  # p0 = int X
+  # p1 = boolean Y
+  # p2 = boolean Z
+  # v0 = true
+
+  invoke-static {}, LTestCase;->$inline$True()Z
+  move-result v0
+
+  :loop_start
+  if-eqz p1, :loop_body   # cannot be determined statically
+
+  # Inner loop which will end up outside its parent
+  :inner_loop_start
+  xor-int/lit8 p2, p2, 1
+  if-eqz p2, :inner_loop_start
+
+  if-nez v0, :loop_end    # will always take the branch
+
+  # Dead block
+  add-int/lit8 p0, p0, 5
+  goto :loop_start
+
+  # Live block
+  :loop_body
+  add-int/lit8 p0, p0, 7
+  goto :loop_start
+
+  :loop_end
+  return p0
+.end method
diff --git a/test/485-checker-dce-loop-update/src/Main.java b/test/485-checker-dce-loop-update/src/Main.java
new file mode 100644
index 0000000000..6bfe08b0d3
--- /dev/null
+++ b/test/485-checker-dce-loop-update/src/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    return;
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 515b8af8ff..07e76205a4 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -385,7 +385,6 @@ TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
 
 # Known broken tests for the optimizing compiler.
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS += 472-unreachable-if-regression # b/19988134
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -429,8 +428,7 @@ endif
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
 
 # Tests that should fail in the read barrier configuration.
-TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \
-  098-ddmc  # b/20720510
+TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS :=
 
 ifeq ($(ART_USE_READ_BARRIER),true)
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
diff --git a/test/Instrumentation/Instrumentation.java b/test/Instrumentation/Instrumentation.java
new file mode 100644
index 0000000000..09d434213b
--- /dev/null
+++ b/test/Instrumentation/Instrumentation.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Instrumentation {
+  // Direct method
+  private void instanceMethod() {
+    System.out.println("instanceMethod");
+  }
+}
diff --git a/test/run-test b/test/run-test
index 2873a35c83..239681ff4e 100755
--- a/test/run-test
+++ b/test/run-test
@@ -39,7 +39,7 @@ if [ -z "$TMPDIR" ]; then
 else
   tmp_dir="${TMPDIR}/$USER/${test_dir}"
 fi
-checker="${progdir}/../tools/checker.py"
+checker="${progdir}/../tools/checker/checker.py"
 
 export JAVA="java"
 export JAVAC="javac -g"
diff --git a/tools/art b/tools/art
index 85e6e2fae6..f167a732c1 100644
--- a/tools/art
+++ b/tools/art
@@ -95,6 +95,7 @@ ANDROID_DATA=$ANDROID_DATA \
   PATH=$ANDROID_ROOT/bin:$PATH \
   $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \
     -XXlib:$LIBART \
+    -Xnorelocate \
     -Ximage:$ANDROID_ROOT/framework/core.art \
     -Xcompiler-option --include-debug-symbols \
     "$@"
diff --git a/tools/checker.py b/tools/checker.py
deleted file mode 100755
index 0bce236223..0000000000
--- a/tools/checker.py
+++ /dev/null
@@ -1,777 +0,0 @@
-#!/usr/bin/env python2
-#
-# Copyright (C) 2014 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Checker is a testing tool which compiles a given test file and compares the
-# state of the control-flow graph before and after each optimization pass
-# against a set of assertions specified alongside the tests.
-#
-# Tests are written in Java, turned into DEX and compiled with the Optimizing
-# compiler. "Check lines" are assertions formatted as comments of the Java file.
-# They begin with prefix 'CHECK' followed by a pattern that the engine attempts
-# to match in the compiler-generated output.
-#
-# Assertions are tested in groups which correspond to the individual compiler
-# passes. Each group of check lines therefore must start with a 'CHECK-START'
-# header which specifies the output group it should be tested against. The group
-# name must exactly match one of the groups recognized in the output (they can
-# be listed with the '--list-groups' command-line flag).
-#
-# Matching of check lines is carried out in the order of appearance in the
-# source file. There are three types of check lines:
-#  - CHECK:     Must match an output line which appears in the output group
-#               later than lines matched against any preceeding checks. Output
-#               lines must therefore match the check lines in the same order.
-#               These are referred to as "in-order" checks in the code.
-#  - CHECK-DAG: Must match an output line which appears in the output group
-#               later than lines matched against any preceeding in-order checks.
-#               In other words, the order of output lines does not matter
-#               between consecutive DAG checks.
-#  - CHECK-NOT: Must not match any output line which appears in the output group
-#               later than lines matched against any preceeding checks and
-#               earlier than lines matched against any subsequent checks.
-#               Surrounding non-negative checks (or boundaries of the group)
-#               therefore create a scope within which the assertion is verified.
-#
-# Check-line patterns are treated as plain text rather than regular expressions
-# but are whitespace agnostic.
-#
-# Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If
-# curly brackets need to be used inside the body of the regex, they need to be
-# enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse
-# the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'.
-#
-# Regex patterns can be named and referenced later. A new variable is defined
-# with '[[name:regex]]' and can be referenced with '[[name]]'. Variables are
-# only valid within the scope of the defining group. Within a group they cannot
-# be redefined or used undefined.
-#
-# Example:
-#   The following assertions can be placed in a Java source file:
-#
-#   // CHECK-START: int MyClass.MyMethod() constant_folding (after)
-#   // CHECK:         [[ID:i[0-9]+]] IntConstant {{11|22}}
-#   // CHECK:                        Return [ [[ID]] ]
-#
-#   The engine will attempt to match the check lines against the output of the
-#   group named on the first line. Together they verify that the CFG after
-#   constant folding returns an integer constant with value either 11 or 22.
-#
-
-from __future__ import print_function
-import argparse
-import os
-import re
-import shutil
-import sys
-import tempfile
-
-class Logger(object):
-
-  class Level(object):
-    NoOutput, Error, Info = range(3)
-
-  class Color(object):
-    Default, Blue, Gray, Purple, Red = range(5)
-
-    @staticmethod
-    def terminalCode(color, out=sys.stdout):
-      if not out.isatty():
-        return ''
-      elif color == Logger.Color.Blue:
-        return '\033[94m'
-      elif color == Logger.Color.Gray:
-        return '\033[37m'
-      elif color == Logger.Color.Purple:
-        return '\033[95m'
-      elif color == Logger.Color.Red:
-        return '\033[91m'
-      else:
-        return '\033[0m'
-
-  Verbosity = Level.Info
-
-  @staticmethod
-  def log(text, level=Level.Info, color=Color.Default, newLine=True, out=sys.stdout):
-    if level <= Logger.Verbosity:
-      text = Logger.Color.terminalCode(color, out) + text + \
-             Logger.Color.terminalCode(Logger.Color.Default, out)
-      if newLine:
-        print(text, file=out)
-      else:
-        print(text, end="", file=out)
-      out.flush()
-
-  @staticmethod
-  def fail(msg, file=None, line=-1):
-    location = ""
-    if file:
-      location += file + ":"
-    if line > 0:
-      location += str(line) + ":"
-    if location:
-      location += " "
-
-    Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
-    Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
-    Logger.log(msg, Logger.Level.Error, out=sys.stderr)
-    sys.exit(msg)
-
-  @staticmethod
-  def startTest(name):
-    Logger.log("TEST ", color=Logger.Color.Purple, newLine=False)
-    Logger.log(name + "... ", newLine=False)
-
-  @staticmethod
-  def testPassed():
-    Logger.log("PASS", color=Logger.Color.Blue)
-
-  @staticmethod
-  def testFailed(msg, file=None, line=-1):
-    Logger.log("FAIL", color=Logger.Color.Red)
-    Logger.fail(msg, file, line)
-
-class CommonEqualityMixin:
-  """Mixin for class equality as equality of the fields."""
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__)
-           and self.__dict__ == other.__dict__)
-
-  def __ne__(self, other):
-    return not self.__eq__(other)
-
-  def __repr__(self):
-    return "<%s: %s>" % (type(self).__name__, str(self.__dict__))
-
-
-class CheckElement(CommonEqualityMixin):
-  """Single element of the check line."""
-
-  class Variant(object):
-    """Supported language constructs."""
-    Text, Pattern, VarRef, VarDef, Separator = range(5)
-
-  rStartOptional = r"("
-  rEndOptional = r")?"
-
-  rName = r"([a-zA-Z][a-zA-Z0-9]*)"
-  rRegex = r"(.+?)"
-  rPatternStartSym = r"(\{\{)"
-  rPatternEndSym = r"(\}\})"
-  rVariableStartSym = r"(\[\[)"
-  rVariableEndSym = r"(\]\])"
-  rVariableSeparator = r"(:)"
-
-  regexPattern = rPatternStartSym + rRegex + rPatternEndSym
-  regexVariable = rVariableStartSym + \
-                    rName + \
-                    (rStartOptional + rVariableSeparator + rRegex + rEndOptional) + \
-                  rVariableEndSym
-
-  def __init__(self, variant, name, pattern):
-    self.variant = variant
-    self.name = name
-    self.pattern = pattern
-
-  @staticmethod
-  def newSeparator():
-    return CheckElement(CheckElement.Variant.Separator, None, None)
-
-  @staticmethod
-  def parseText(text):
-    return CheckElement(CheckElement.Variant.Text, None, re.escape(text))
-
-  @staticmethod
-  def parsePattern(patternElem):
-    return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:-2])
-
-  @staticmethod
-  def parseVariable(varElem):
-    colonPos = varElem.find(":")
-    if colonPos == -1:
-      # Variable reference
-      name = varElem[2:-2]
-      return CheckElement(CheckElement.Variant.VarRef, name, None)
-    else:
-      # Variable definition
-      name = varElem[2:colonPos]
-      body = varElem[colonPos+1:-2]
-      return CheckElement(CheckElement.Variant.VarDef, name, body)
-
-class CheckLine(CommonEqualityMixin):
-  """Representation of a single assertion in the check file formed of one or
-     more regex elements. Matching against an output line is successful only
-     if all regex elements can be matched in the given order."""
-
-  class Variant(object):
-    """Supported types of assertions."""
-    InOrder, DAG, Not = range(3)
-
-  def __init__(self, content, variant=Variant.InOrder, fileName=None, lineNo=-1):
-    self.fileName = fileName
-    self.lineNo = lineNo
-    self.content = content.strip()
-
-    self.variant = variant
-    self.lineParts = self.__parse(self.content)
-    if not self.lineParts:
-      Logger.fail("Empty check line", self.fileName, self.lineNo)
-
-    if self.variant == CheckLine.Variant.Not:
-      for elem in self.lineParts:
-        if elem.variant == CheckElement.Variant.VarDef:
-          Logger.fail("CHECK-NOT lines cannot define variables", self.fileName, self.lineNo)
-
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__) and
-            self.variant == other.variant and
-            self.lineParts == other.lineParts)
-
-  # Returns True if the given Match object was at the beginning of the line.
-  def __isMatchAtStart(self, match):
-    return (match is not None) and (match.start() == 0)
-
-  # Takes in a list of Match objects and returns the minimal start point among
-  # them. If there aren't any successful matches it returns the length of
-  # the searched string.
-  def __firstMatch(self, matches, string):
-    starts = map(lambda m: len(string) if m is None else m.start(), matches)
-    return min(starts)
-
-  # This method parses the content of a check line stripped of the initial
-  # comment symbol and the CHECK keyword.
-  def __parse(self, line):
-    lineParts = []
-    # Loop as long as there is something to parse.
-    while line:
-      # Search for the nearest occurrence of the special markers.
-      matchWhitespace = re.search(r"\s+", line)
-      matchPattern = re.search(CheckElement.regexPattern, line)
-      matchVariable = re.search(CheckElement.regexVariable, line)
-
-      # If one of the above was identified at the current position, extract them
-      # from the line, parse them and add to the list of line parts.
-      if self.__isMatchAtStart(matchWhitespace):
-        # A whitespace in the check line creates a new separator of line parts.
-        # This allows for ignored output between the previous and next parts.
-        line = line[matchWhitespace.end():]
-        lineParts.append(CheckElement.newSeparator())
-      elif self.__isMatchAtStart(matchPattern):
-        pattern = line[0:matchPattern.end()]
-        line = line[matchPattern.end():]
-        lineParts.append(CheckElement.parsePattern(pattern))
-      elif self.__isMatchAtStart(matchVariable):
-        var = line[0:matchVariable.end()]
-        line = line[matchVariable.end():]
-        lineParts.append(CheckElement.parseVariable(var))
-      else:
-        # If we're not currently looking at a special marker, this is a plain
-        # text match all the way until the first special marker (or the end
-        # of the line).
-        firstMatch = self.__firstMatch([ matchWhitespace, matchPattern, matchVariable ], line)
-        text = line[0:firstMatch]
-        line = line[firstMatch:]
-        lineParts.append(CheckElement.parseText(text))
-    return lineParts
-
-  # Returns the regex pattern to be matched in the output line. Variable
-  # references are substituted with their current values provided in the
-  # 'varState' argument.
-  # An exception is raised if a referenced variable is undefined.
-  def __generatePattern(self, linePart, varState):
-    if linePart.variant == CheckElement.Variant.VarRef:
-      try:
-        return re.escape(varState[linePart.name])
-      except KeyError:
-        Logger.testFailed("Use of undefined variable \"" + linePart.name + "\"",
-                          self.fileName, self.lineNo)
-    else:
-      return linePart.pattern
-
-  def __isSeparated(self, outputLine, matchStart):
-    return (matchStart == 0) or (outputLine[matchStart - 1:matchStart].isspace())
-
-  # Attempts to match the check line against a line from the output file with
-  # the given initial variable values. It returns the new variable state if
-  # successful and None otherwise.
-  def match(self, outputLine, initialVarState):
-    # Do the full matching on a shadow copy of the variable state. If the
-    # matching fails half-way, we will not need to revert the state.
-    varState = dict(initialVarState)
-
-    matchStart = 0
-    isAfterSeparator = True
-
-    # Now try to parse all of the parts of the check line in the right order.
-    # Variable values are updated on-the-fly, meaning that a variable can
-    # be referenced immediately after its definition.
-    for part in self.lineParts:
-      if part.variant == CheckElement.Variant.Separator:
-        isAfterSeparator = True
-        continue
-
-      # Find the earliest match for this line part.
-      pattern = self.__generatePattern(part, varState)
-      while True:
-        match = re.search(pattern, outputLine[matchStart:])
-        if (match is None) or (not isAfterSeparator and not self.__isMatchAtStart(match)):
-          return None
-        matchEnd = matchStart + match.end()
-        matchStart += match.start()
-
-        # Check if this is a valid match if we expect a whitespace separator
-        # before the matched text. Otherwise loop and look for another match.
-        if not isAfterSeparator or self.__isSeparated(outputLine, matchStart):
-          break
-        else:
-          matchStart += 1
-
-      if part.variant == CheckElement.Variant.VarDef:
-        if part.name in varState:
-          Logger.testFailed("Multiple definitions of variable \"" + part.name + "\"",
-                            self.fileName, self.lineNo)
-        varState[part.name] = outputLine[matchStart:matchEnd]
-
-      matchStart = matchEnd
-      isAfterSeparator = False
-
-    # All parts were successfully matched. Return the new variable state.
-    return varState
-
-
-class CheckGroup(CommonEqualityMixin):
-  """Represents a named collection of check lines which are to be matched
-     against an output group of the same name."""
-
-  def __init__(self, name, lines, fileName=None, lineNo=-1):
-    self.fileName = fileName
-    self.lineNo = lineNo
-
-    if not name:
-      Logger.fail("Check group does not have a name", self.fileName, self.lineNo)
-    if not lines:
-      Logger.fail("Check group does not have a body", self.fileName, self.lineNo)
-
-    self.name = name
-    self.lines = lines
-
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__) and
-            self.name == other.name and
-            self.lines == other.lines)
-
-  def __headAndTail(self, list):
-    return list[0], list[1:]
-
-  # Splits a list of check lines at index 'i' such that lines[i] is the first
-  # element whose variant is not equal to the given parameter.
-  def __splitByVariant(self, lines, variant):
-    i = 0
-    while i < len(lines) and lines[i].variant == variant:
-      i += 1
-    return lines[:i], lines[i:]
-
-  # Extracts the first sequence of check lines which are independent of each
-  # other's match location, i.e. either consecutive DAG lines or a single
-  # InOrder line. Any Not lines preceeding this sequence are also extracted.
-  def __nextIndependentChecks(self, checkLines):
-    notChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.Not)
-    if not checkLines:
-      return notChecks, [], []
-
-    head, tail = self.__headAndTail(checkLines)
-    if head.variant == CheckLine.Variant.InOrder:
-      return notChecks, [head], tail
-    else:
-      assert head.variant == CheckLine.Variant.DAG
-      independentChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.DAG)
-      return notChecks, independentChecks, checkLines
-
-  # If successful, returns the line number of the first output line matching the
-  # check line and the updated variable state. Otherwise returns -1 and None,
-  # respectively. The 'lineFilter' parameter can be used to supply a list of
-  # line numbers (counting from 1) which should be skipped.
-  def __findFirstMatch(self, checkLine, outputLines, startLineNo, lineFilter, varState):
-    matchLineNo = startLineNo
-    for outputLine in outputLines:
-      if matchLineNo not in lineFilter:
-        newVarState = checkLine.match(outputLine, varState)
-        if newVarState is not None:
-          return matchLineNo, newVarState
-      matchLineNo += 1
-    return -1, None
-
-  # Matches the given positive check lines against the output in order of
-  # appearance. Variable state is propagated but the scope of the search remains
-  # the same for all checks. Each output line can only be matched once.
-  # If all check lines are matched, the resulting variable state is returned
-  # together with the remaining output. The function also returns output lines
-  # which appear before either of the matched lines so they can be tested
-  # against Not checks.
-  def __matchIndependentChecks(self, checkLines, outputLines, startLineNo, varState):
-    # If no checks are provided, skip over the entire output.
-    if not checkLines:
-      return outputLines, [], startLineNo + len(outputLines), varState
-
-    # Keep track of which lines have been matched.
-    matchedLines = []
-
-    # Find first unused output line which matches each check line.
-    for checkLine in checkLines:
-      matchLineNo, varState = \
-        self.__findFirstMatch(checkLine, outputLines, startLineNo, matchedLines, varState)
-      if varState is None:
-        Logger.testFailed("Could not match check line \"" + checkLine.content + "\" " +
-                          "starting from output line " + str(startLineNo),
-                          self.fileName, checkLine.lineNo)
-      matchedLines.append(matchLineNo)
-
-    # Return new variable state and the output lines which lie outside the
-    # match locations of this independent group.
-    minMatchLineNo = min(matchedLines)
-    maxMatchLineNo = max(matchedLines)
-    preceedingLines = outputLines[:minMatchLineNo - startLineNo]
-    remainingLines = outputLines[maxMatchLineNo - startLineNo + 1:]
-    return preceedingLines, remainingLines, maxMatchLineNo + 1, varState
-
-  # Makes sure that the given check lines do not match any of the given output
-  # lines. Variable state does not change.
-  def __matchNotLines(self, checkLines, outputLines, startLineNo, varState):
-    for checkLine in checkLines:
-      assert checkLine.variant == CheckLine.Variant.Not
-      matchLineNo, matchVarState = \
-        self.__findFirstMatch(checkLine, outputLines, startLineNo, [], varState)
-      if matchVarState is not None:
-        Logger.testFailed("CHECK-NOT line \"" + checkLine.content + "\" matches output line " + \
-                          str(matchLineNo), self.fileName, checkLine.lineNo)
-
-  # Matches the check lines in this group against an output group. It is
-  # responsible for running the checks in the right order and scope, and
-  # for propagating the variable state between the check lines.
-  def match(self, outputGroup):
-    varState = {}
-    checkLines = self.lines
-    outputLines = outputGroup.body
-    startLineNo = outputGroup.lineNo
-
-    while checkLines:
-      # Extract the next sequence of location-independent checks to be matched.
-      notChecks, independentChecks, checkLines = self.__nextIndependentChecks(checkLines)
-
-      # Match the independent checks.
-      notOutput, outputLines, newStartLineNo, newVarState = \
-        self.__matchIndependentChecks(independentChecks, outputLines, startLineNo, varState)
-
-      # Run the Not checks against the output lines which lie between the last
-      # two independent groups or the bounds of the output.
-      self.__matchNotLines(notChecks, notOutput, startLineNo, varState)
-
-      # Update variable state.
-      startLineNo = newStartLineNo
-      varState = newVarState
-
-class OutputGroup(CommonEqualityMixin):
-  """Represents a named part of the test output against which a check group of
-     the same name is to be matched."""
-
-  def __init__(self, name, body, fileName=None, lineNo=-1):
-    if not name:
-      Logger.fail("Output group does not have a name", fileName, lineNo)
-    if not body:
-      Logger.fail("Output group does not have a body", fileName, lineNo)
-
-    self.name = name
-    self.body = body
-    self.lineNo = lineNo
-
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__) and
-            self.name == other.name and
-            self.body == other.body)
-
-
-class FileSplitMixin(object):
-  """Mixin for representing text files which need to be split into smaller
-     chunks before being parsed."""
-
-  def _parseStream(self, stream):
-    lineNo = 0
-    allGroups = []
-    currentGroup = None
-
-    for line in stream:
-      lineNo += 1
-      line = line.strip()
-      if not line:
-        continue
-
-      # Let the child class process the line and return information about it.
-      # The _processLine method can modify the content of the line (or delete it
-      # entirely) and specify whether it starts a new group.
-      processedLine, newGroupName = self._processLine(line, lineNo)
-      if newGroupName is not None:
-        currentGroup = (newGroupName, [], lineNo)
-        allGroups.append(currentGroup)
-      if processedLine is not None:
-        if currentGroup is not None:
-          currentGroup[1].append(processedLine)
-        else:
-          self._exceptionLineOutsideGroup(line, lineNo)
-
-    # Finally, take the generated line groups and let the child class process
-    # each one before storing the final outcome.
-    return list(map(lambda group: self._processGroup(group[0], group[1], group[2]), allGroups))
-
-
-class CheckFile(FileSplitMixin):
-  """Collection of check groups extracted from the input test file."""
-
-  def __init__(self, prefix, checkStream, fileName=None):
-    self.fileName = fileName
-    self.prefix = prefix
-    self.groups = self._parseStream(checkStream)
-
-  # Attempts to parse a check line. The regex searches for a comment symbol
-  # followed by the CHECK keyword, given attribute and a colon at the very
-  # beginning of the line. Whitespaces are ignored.
-  def _extractLine(self, prefix, line):
-    rIgnoreWhitespace = r"\s*"
-    rCommentSymbols = [r"//", r"#"]
-    regexPrefix = rIgnoreWhitespace + \
-                  r"(" + r"|".join(rCommentSymbols) + r")" + \
-                  rIgnoreWhitespace + \
-                  prefix + r":"
-
-    # The 'match' function succeeds only if the pattern is matched at the
-    # beginning of the line.
-    match = re.match(regexPrefix, line)
-    if match is not None:
-      return line[match.end():].strip()
-    else:
-      return None
-
-  # This function is invoked on each line of the check file and returns a pair
-  # which instructs the parser how the line should be handled. If the line is to
-  # be included in the current check group, it is returned in the first value.
-  # If the line starts a new check group, the name of the group is returned in
-  # the second value.
-  def _processLine(self, line, lineNo):
-    # Lines beginning with 'CHECK-START' start a new check group.
-    startLine = self._extractLine(self.prefix + "-START", line)
-    if startLine is not None:
-      return None, startLine
-
-    # Lines starting only with 'CHECK' are matched in order.
-    plainLine = self._extractLine(self.prefix, line)
-    if plainLine is not None:
-      return (plainLine, CheckLine.Variant.InOrder, lineNo), None
-
-    # 'CHECK-DAG' lines are no-order assertions.
-    dagLine = self._extractLine(self.prefix + "-DAG", line)
-    if dagLine is not None:
-      return (dagLine, CheckLine.Variant.DAG, lineNo), None
-
-    # 'CHECK-NOT' lines are no-order negative assertions.
-    notLine = self._extractLine(self.prefix + "-NOT", line)
-    if notLine is not None:
-      return (notLine, CheckLine.Variant.Not, lineNo), None
-
-    # Other lines are ignored.
-    return None, None
-
-  def _exceptionLineOutsideGroup(self, line, lineNo):
-    Logger.fail("Check line not inside a group", self.fileName, lineNo)
-
-  # Constructs a check group from the parser-collected check lines.
-  def _processGroup(self, name, lines, lineNo):
-    checkLines = list(map(lambda line: CheckLine(line[0], line[1], self.fileName, line[2]), lines))
-    return CheckGroup(name, checkLines, self.fileName, lineNo)
-
-  def match(self, outputFile):
-    for checkGroup in self.groups:
-      # TODO: Currently does not handle multiple occurrences of the same group
-      # name, e.g. when a pass is run multiple times. It will always try to
-      # match a check group against the first output group of the same name.
-      outputGroup = outputFile.findGroup(checkGroup.name)
-      if outputGroup is None:
-        Logger.fail("Group \"" + checkGroup.name + "\" not found in the output",
-                    self.fileName, checkGroup.lineNo)
-      Logger.startTest(checkGroup.name)
-      checkGroup.match(outputGroup)
-      Logger.testPassed()
-
-
-class OutputFile(FileSplitMixin):
-  """Representation of the output generated by the test and split into groups
-     within which the checks are performed.
-
-     C1visualizer format is parsed with a state machine which differentiates
-     between the 'compilation' and 'cfg' blocks. The former marks the beginning
-     of a method. It is parsed for the method's name but otherwise ignored. Each
-     subsequent CFG block represents one stage of the compilation pipeline and
-     is parsed into an output group named "<method name> <pass name>".
-     """
-
-  class ParsingState:
-    OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
-
-  def __init__(self, outputStream, fileName=None):
-    self.fileName = fileName
-
-    # Initialize the state machine
-    self.lastMethodName = None
-    self.state = OutputFile.ParsingState.OutsideBlock
-    self.groups = self._parseStream(outputStream)
-
-  # This function is invoked on each line of the output file and returns a pair
-  # which instructs the parser how the line should be handled. If the line is to
-  # be included in the current group, it is returned in the first value. If the
-  # line starts a new output group, the name of the group is returned in the
-  # second value.
-  def _processLine(self, line, lineNo):
-    if self.state == OutputFile.ParsingState.StartingCfgBlock:
-      # Previous line started a new 'cfg' block which means that this one must
-      # contain the name of the pass (this is enforced by C1visualizer).
-      if re.match("name\s+\"[^\"]+\"", line):
-        # Extract the pass name, prepend it with the name of the method and
-        # return as the beginning of a new group.
-        self.state = OutputFile.ParsingState.InsideCfgBlock
-        return (None, self.lastMethodName + " " + line.split("\"")[1])
-      else:
-        Logger.fail("Expected output group name", self.fileName, lineNo)
-
-    elif self.state == OutputFile.ParsingState.InsideCfgBlock:
-      if line == "end_cfg":
-        self.state = OutputFile.ParsingState.OutsideBlock
-        return (None, None)
-      else:
-        return (line, None)
-
-    elif self.state == OutputFile.ParsingState.InsideCompilationBlock:
-      # Search for the method's name. Format: method "<name>"
-      if re.match("method\s+\"[^\"]*\"", line):
-        methodName = line.split("\"")[1].strip()
-        if not methodName:
-          Logger.fail("Empty method name in output", self.fileName, lineNo)
-        self.lastMethodName = methodName
-      elif line == "end_compilation":
-        self.state = OutputFile.ParsingState.OutsideBlock
-      return (None, None)
-
-    else:
-      assert self.state == OutputFile.ParsingState.OutsideBlock
-      if line == "begin_cfg":
-        # The line starts a new group but we'll wait until the next line from
-        # which we can extract the name of the pass.
-        if self.lastMethodName is None:
-          Logger.fail("Expected method header", self.fileName, lineNo)
-        self.state = OutputFile.ParsingState.StartingCfgBlock
-        return (None, None)
-      elif line == "begin_compilation":
-        self.state = OutputFile.ParsingState.InsideCompilationBlock
-        return (None, None)
-      else:
-        Logger.fail("Output line not inside a group", self.fileName, lineNo)
-
-  # Constructs an output group from the parser-collected output lines.
-  def _processGroup(self, name, lines, lineNo):
-    return OutputGroup(name, lines, self.fileName, lineNo + 1)
-
-  def findGroup(self, name):
-    for group in self.groups:
-      if group.name == name:
-        return group
-    return None
-
-
-def ParseArguments():
-  parser = argparse.ArgumentParser()
-  parser.add_argument("tested_file",
-                      help="text file the checks should be verified against")
-  parser.add_argument("source_path", nargs="?",
-                      help="path to file/folder with checking annotations")
-  parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX",
-                      help="prefix of checks in the test files (default: CHECK)")
-  parser.add_argument("--list-groups", dest="list_groups", action="store_true",
-                      help="print a list of all groups found in the tested file")
-  parser.add_argument("--dump-group", dest="dump_group", metavar="GROUP",
-                      help="print the contents of an output group")
-  parser.add_argument("-q", "--quiet", action="store_true",
-                      help="print only errors")
-  return parser.parse_args()
-
-
-def ListGroups(outputFilename):
-  outputFile = OutputFile(open(outputFilename, "r"))
-  for group in outputFile.groups:
-    Logger.log(group.name)
-
-
-def DumpGroup(outputFilename, groupName):
-  outputFile = OutputFile(open(outputFilename, "r"))
-  group = outputFile.findGroup(groupName)
-  if group:
-    lineNo = group.lineNo
-    maxLineNo = lineNo + len(group.body)
-    lenLineNo = len(str(maxLineNo)) + 2
-    for line in group.body:
-      Logger.log((str(lineNo) + ":").ljust(lenLineNo) + line)
-      lineNo += 1
-  else:
-    Logger.fail("Group \"" + groupName + "\" not found in the output")
-
-
-# Returns a list of files to scan for check annotations in the given path. Path
-# to a file is returned as a single-element list, directories are recursively
-# traversed and all '.java' files returned.
-def FindCheckFiles(path):
-  if not path:
-    Logger.fail("No source path provided")
-  elif os.path.isfile(path):
-    return [ path ]
-  elif os.path.isdir(path):
-    foundFiles = []
-    for root, dirs, files in os.walk(path):
-      for file in files:
-        if os.path.splitext(file)[1] == ".java":
-          foundFiles.append(os.path.join(root, file))
-    return foundFiles
-  else:
-    Logger.fail("Source path \"" + path + "\" not found")
-
-
-def RunChecks(checkPrefix, checkPath, outputFilename):
-  outputBaseName = os.path.basename(outputFilename)
-  outputFile = OutputFile(open(outputFilename, "r"), outputBaseName)
-
-  for checkFilename in FindCheckFiles(checkPath):
-    checkBaseName = os.path.basename(checkFilename)
-    checkFile = CheckFile(checkPrefix, open(checkFilename, "r"), checkBaseName)
-    checkFile.match(outputFile)
-
-
-if __name__ == "__main__":
-  args = ParseArguments()
-
-  if args.quiet:
-    Logger.Verbosity = Logger.Level.Error
-
-  if args.list_groups:
-    ListGroups(args.tested_file)
-  elif args.dump_group:
-    DumpGroup(args.tested_file, args.dump_group)
-  else:
-    RunChecks(args.check_prefix, args.source_path, args.tested_file)
diff --git a/tools/checker/README b/tools/checker/README
new file mode 100644
index 0000000000..9b23ae9299
--- /dev/null
+++ b/tools/checker/README
@@ -0,0 +1,54 @@
+Checker is a testing tool which compiles a given test file and compares the
+state of the control-flow graph before and after each optimization pass
+against a set of assertions specified alongside the tests.
+
+Tests are written in Java, turned into DEX and compiled with the Optimizing
+compiler. "Check lines" are assertions formatted as comments of the Java file.
+They begin with prefix 'CHECK' followed by a pattern that the engine attempts
+to match in the compiler-generated output.
+
+Assertions are tested in groups which correspond to the individual compiler
+passes. Each group of check lines therefore must start with a 'CHECK-START'
+header which specifies the output group it should be tested against. The group
+name must exactly match one of the groups recognized in the output (they can
+be listed with the '--list-groups' command-line flag).
+
+Matching of check lines is carried out in the order of appearance in the
+source file. There are three types of check lines:
+ - CHECK:     Must match an output line which appears in the output group
+              later than lines matched against any preceeding checks. Output
+              lines must therefore match the check lines in the same order.
+              These are referred to as "in-order" checks in the code.
+ - CHECK-DAG: Must match an output line which appears in the output group
+              later than lines matched against any preceeding in-order checks.
+              In other words, the order of output lines does not matter
+              between consecutive DAG checks.
+ - CHECK-NOT: Must not match any output line which appears in the output group
+              later than lines matched against any preceeding checks and
+              earlier than lines matched against any subsequent checks.
+              Surrounding non-negative checks (or boundaries of the group)
+              therefore create a scope within which the assertion is verified.
+
+Check-line patterns are treated as plain text rather than regular expressions
+but are whitespace agnostic.
+
+Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If
+curly brackets need to be used inside the body of the regex, they need to be
+enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse
+the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'.
+
+Regex patterns can be named and referenced later. A new variable is defined
+with '[[name:regex]]' and can be referenced with '[[name]]'. Variables are
+only valid within the scope of the defining group. Within a group they cannot
+be redefined or used undefined.
+
+Example:
+  The following assertions can be placed in a Java source file:
+
+  // CHECK-START: int MyClass.MyMethod() constant_folding (after)
+  // CHECK:         [[ID:i\d+]]  IntConstant {{11|22}}
+  // CHECK:                      Return [ [[ID]] ]
+
+  The engine will attempt to match the check lines against the output of the
+  group named on the first line. Together they verify that the CFG after
+  constant folding returns an integer constant with value either 11 or 22.
diff --git a/tools/checker/checker.py b/tools/checker/checker.py
new file mode 100755
index 0000000000..ed630e3d12
--- /dev/null
+++ b/tools/checker/checker.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+from common.logger                    import Logger
+from file_format.c1visualizer.parser  import ParseC1visualizerStream
+from file_format.checker.parser       import ParseCheckerStream
+from match.file                       import MatchFiles
+
+def ParseArguments():
+  parser = argparse.ArgumentParser()
+  parser.add_argument("tested_file",
+                      help="text file the checks should be verified against")
+  parser.add_argument("source_path", nargs="?",
+                      help="path to file/folder with checking annotations")
+  parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX",
+                      help="prefix of checks in the test files (default: CHECK)")
+  parser.add_argument("--list-passes", dest="list_passes", action="store_true",
+                      help="print a list of all passes found in the tested file")
+  parser.add_argument("--dump-pass", dest="dump_pass", metavar="PASS",
+                      help="print a compiler pass dump")
+  parser.add_argument("-q", "--quiet", action="store_true",
+                      help="print only errors")
+  return parser.parse_args()
+
+
+def ListPasses(outputFilename):
+  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
+  for compiler_pass in c1File.passes:
+    Logger.log(compiler_pass.name)
+
+
+def DumpPass(outputFilename, passName):
+  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
+  compiler_pass = c1File.findPass(passName)
+  if compiler_pass:
+    maxLineNo = compiler_pass.startLineNo + len(compiler_pass.body)
+    lenLineNo = len(str(maxLineNo)) + 2
+    curLineNo = compiler_pass.startLineNo
+    for line in compiler_pass.body:
+      Logger.log((str(curLineNo) + ":").ljust(lenLineNo) + line)
+      curLineNo += 1
+  else:
+    Logger.fail("Pass \"" + passName + "\" not found in the output")
+
+
+def FindCheckerFiles(path):
+  """ Returns a list of files to scan for check annotations in the given path.
+      Path to a file is returned as a single-element list, directories are
+      recursively traversed and all '.java' files returned.
+  """
+  if not path:
+    Logger.fail("No source path provided")
+  elif os.path.isfile(path):
+    return [ path ]
+  elif os.path.isdir(path):
+    foundFiles = []
+    for root, dirs, files in os.walk(path):
+      for file in files:
+        extension = os.path.splitext(file)[1]
+        if extension in [".java", ".smali"]:
+          foundFiles.append(os.path.join(root, file))
+    return foundFiles
+  else:
+    Logger.fail("Source path \"" + path + "\" not found")
+
+
+def RunTests(checkPrefix, checkPath, outputFilename):
+  c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
+  for checkFilename in FindCheckerFiles(checkPath):
+    checkerFile = ParseCheckerStream(os.path.basename(checkFilename),
+                                     checkPrefix,
+                                     open(checkFilename, "r"))
+    MatchFiles(checkerFile, c1File)
+
+
+if __name__ == "__main__":
+  args = ParseArguments()
+
+  if args.quiet:
+    Logger.Verbosity = Logger.Level.Error
+
+  if args.list_passes:
+    ListPasses(args.tested_file)
+  elif args.dump_pass:
+    DumpPass(args.tested_file, args.dump_pass)
+  else:
+    RunTests(args.check_prefix, args.source_path, args.tested_file)
diff --git a/tools/checker/common/__init__.py b/tools/checker/common/__init__.py
new file mode 100644
index 0000000000..d0a140be2b
--- /dev/null
+++ b/tools/checker/common/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/common/logger.py b/tools/checker/common/logger.py
new file mode 100644
index 0000000000..28bb458da7
--- /dev/null
+++ b/tools/checker/common/logger.py
@@ -0,0 +1,81 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import sys
+
+class Logger(object):
+
+  class Level(object):
+    NoOutput, Error, Info = range(3)
+
+  class Color(object):
+    Default, Blue, Gray, Purple, Red = range(5)
+
+    @staticmethod
+    def terminalCode(color, out=sys.stdout):
+      if not out.isatty():
+        return ''
+      elif color == Logger.Color.Blue:
+        return '\033[94m'
+      elif color == Logger.Color.Gray:
+        return '\033[37m'
+      elif color == Logger.Color.Purple:
+        return '\033[95m'
+      elif color == Logger.Color.Red:
+        return '\033[91m'
+      else:
+        return '\033[0m'
+
+  Verbosity = Level.Info
+
+  @staticmethod
+  def log(text, level=Level.Info, color=Color.Default, newLine=True, out=sys.stdout):
+    if level <= Logger.Verbosity:
+      text = Logger.Color.terminalCode(color, out) + text + \
+             Logger.Color.terminalCode(Logger.Color.Default, out)
+      if newLine:
+        print(text, file=out)
+      else:
+        print(text, end="", file=out)
+      out.flush()
+
+  @staticmethod
+  def fail(msg, file=None, line=-1):
+    location = ""
+    if file:
+      location += file + ":"
+    if line > 0:
+      location += str(line) + ":"
+    if location:
+      location += " "
+
+    Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
+    Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
+    Logger.log(msg, Logger.Level.Error, out=sys.stderr)
+    sys.exit(msg)
+
+  @staticmethod
+  def startTest(name):
+    Logger.log("TEST ", color=Logger.Color.Purple, newLine=False)
+    Logger.log(name + "... ", newLine=False)
+
+  @staticmethod
+  def testPassed():
+    Logger.log("PASS", color=Logger.Color.Blue)
+
+  @staticmethod
+  def testFailed(msg, file=None, line=-1):
+    Logger.log("FAIL", color=Logger.Color.Red)
+    Logger.fail(msg, file, line)
diff --git a/tools/checker/common/mixins.py b/tools/checker/common/mixins.py
new file mode 100644
index 0000000000..819de240a7
--- /dev/null
+++ b/tools/checker/common/mixins.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+class EqualityMixin:
+  """ Object equality via equality of dictionaries. """
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.__dict__ == other.__dict__
+
+class PrintableMixin:
+  """ Prints object as name-dictionary pair. """
+
+  def __repr__(self):
+    return "<%s: %s>" % (type(self).__name__, str(self.__dict__))
diff --git a/tools/checker/common/testing.py b/tools/checker/common/testing.py
new file mode 100644
index 0000000000..1299c07d5f
--- /dev/null
+++ b/tools/checker/common/testing.py
@@ -0,0 +1,22 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def ToUnicode(string):
+  """ Converts a string into Unicode.
+
+  This is a delegate function for the built-in `unicode`. It checks if the input
+  is not `None`, because `unicode` turns it into an actual "None" string.
+  """
+  assert string is not None
+  return unicode(string)
diff --git a/tools/checker/file_format/__init__.py b/tools/checker/file_format/__init__.py
new file mode 100644
index 0000000000..d0a140be2b
--- /dev/null
+++ b/tools/checker/file_format/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/c1visualizer/__init__.py b/tools/checker/file_format/c1visualizer/__init__.py
new file mode 100644
index 0000000000..d0a140be2b
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/c1visualizer/parser.py b/tools/checker/file_format/c1visualizer/parser.py
new file mode 100644
index 0000000000..335a195883
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/parser.py
@@ -0,0 +1,87 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger                   import Logger
+from file_format.common              import SplitStream
+from file_format.c1visualizer.struct import C1visualizerFile, C1visualizerPass
+
+import re
+
+class C1ParserState:
+  OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
+
+  def __init__(self):
+    self.currentState = C1ParserState.OutsideBlock
+    self.lastMethodName = None
+
+def __parseC1Line(line, lineNo, state, fileName):
+  """ This function is invoked on each line of the output file and returns
+      a pair which instructs the parser how the line should be handled. If the
+      line is to be included in the current group, it is returned in the first
+      value. If the line starts a new output group, the name of the group is
+      returned in the second value.
+  """
+  if state.currentState == C1ParserState.StartingCfgBlock:
+    # Previous line started a new 'cfg' block which means that this one must
+    # contain the name of the pass (this is enforced by C1visualizer).
+    if re.match("name\s+\"[^\"]+\"", line):
+      # Extract the pass name, prepend it with the name of the method and
+      # return as the beginning of a new group.
+      state.currentState = C1ParserState.InsideCfgBlock
+      return (None, state.lastMethodName + " " + line.split("\"")[1])
+    else:
+      Logger.fail("Expected output group name", fileName, lineNo)
+
+  elif state.currentState == C1ParserState.InsideCfgBlock:
+    if line == "end_cfg":
+      state.currentState = C1ParserState.OutsideBlock
+      return (None, None)
+    else:
+      return (line, None)
+
+  elif state.currentState == C1ParserState.InsideCompilationBlock:
+    # Search for the method's name. Format: method "<name>"
+    if re.match("method\s+\"[^\"]*\"", line):
+      methodName = line.split("\"")[1].strip()
+      if not methodName:
+        Logger.fail("Empty method name in output", fileName, lineNo)
+      state.lastMethodName = methodName
+    elif line == "end_compilation":
+      state.currentState = C1ParserState.OutsideBlock
+    return (None, None)
+
+  else:
+    assert state.currentState == C1ParserState.OutsideBlock
+    if line == "begin_cfg":
+      # The line starts a new group but we'll wait until the next line from
+      # which we can extract the name of the pass.
+      if state.lastMethodName is None:
+        Logger.fail("Expected method header", fileName, lineNo)
+      state.currentState = C1ParserState.StartingCfgBlock
+      return (None, None)
+    elif line == "begin_compilation":
+      state.currentState = C1ParserState.InsideCompilationBlock
+      return (None, None)
+    else:
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+
+def ParseC1visualizerStream(fileName, stream):
+  c1File = C1visualizerFile(fileName)
+  state = C1ParserState()
+  fnProcessLine = lambda line, lineNo: __parseC1Line(line, lineNo, state, fileName)
+  fnLineOutsideChunk = lambda line, lineNo: \
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+  for passName, passLines, startLineNo in SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+    C1visualizerPass(c1File, passName, passLines, startLineNo + 1)
+  return c1File
diff --git a/tools/checker/file_format/c1visualizer/struct.py b/tools/checker/file_format/c1visualizer/struct.py
new file mode 100644
index 0000000000..991564eff4
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/struct.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger import Logger
+from common.mixins import PrintableMixin
+
+class C1visualizerFile(PrintableMixin):
+
+  def __init__(self, fileName):
+    self.fileName = fileName
+    self.passes = []
+
+  def addPass(self, new_pass):
+    self.passes.append(new_pass)
+
+  def findPass(self, name):
+    for entry in self.passes:
+      if entry.name == name:
+        return entry
+    return None
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.passes == other.passes
+
+
+class C1visualizerPass(PrintableMixin):
+
+  def __init__(self, parent, name, body, startLineNo):
+    self.parent = parent
+    self.name = name
+    self.body = body
+    self.startLineNo = startLineNo
+
+    if not self.name:
+      Logger.fail("C1visualizer pass does not have a name", self.fileName, self.startLineNo)
+    if not self.body:
+      Logger.fail("C1visualizer pass does not have a body", self.fileName, self.startLineNo)
+
+    self.parent.addPass(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.name == other.name \
+       and self.body == other.body
diff --git a/tools/checker/file_format/c1visualizer/test.py b/tools/checker/file_format/c1visualizer/test.py
new file mode 100644
index 0000000000..812a4cf9ce
--- /dev/null
+++ b/tools/checker/file_format/c1visualizer/test.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.testing                  import ToUnicode
+from file_format.c1visualizer.parser import ParseC1visualizerStream
+from file_format.c1visualizer.struct import C1visualizerFile, C1visualizerPass
+
+import io
+import unittest
+
+class C1visualizerParser_Test(unittest.TestCase):
+
+  def createFile(self, passList):
+    """ Creates an instance of CheckerFile from provided info.
+
+    Data format: [ ( <case-name>, [ ( <text>, <assert-variant> ), ... ] ), ... ]
+    """
+    c1File = C1visualizerFile("<c1_file>")
+    for passEntry in passList:
+      passName = passEntry[0]
+      passBody = passEntry[1]
+      c1Pass = C1visualizerPass(c1File, passName, passBody, 0)
+    return c1File
+
+  def assertParsesTo(self, c1Text, expectedData):
+    expectedFile = self.createFile(expectedData)
+    actualFile = ParseC1visualizerStream("<c1_file>", io.StringIO(ToUnicode(c1Text)))
+    return self.assertEqual(expectedFile, actualFile)
+
+  def test_EmptyFile(self):
+    self.assertParsesTo("", [])
+
+  def test_SingleGroup(self):
+    self.assertParsesTo(
+      """
+        begin_compilation
+          method "MyMethod"
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+      """,
+      [ ( "MyMethod pass1", [ "foo", "bar" ] ) ])
+
+  def test_MultipleGroups(self):
+    self.assertParsesTo(
+      """
+        begin_compilation
+          name "xyz1"
+          method "MyMethod1"
+          date 1234
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+        begin_cfg
+          name "pass2"
+          abc
+          def
+        end_cfg
+      """,
+      [ ( "MyMethod1 pass1", [ "foo", "bar" ] ),
+        ( "MyMethod1 pass2", [ "abc", "def" ] ) ])
+    self.assertParsesTo(
+      """
+        begin_compilation
+          name "xyz1"
+          method "MyMethod1"
+          date 1234
+        end_compilation
+        begin_cfg
+          name "pass1"
+          foo
+          bar
+        end_cfg
+        begin_compilation
+          name "xyz2"
+          method "MyMethod2"
+          date 5678
+        end_compilation
+        begin_cfg
+          name "pass2"
+          abc
+          def
+        end_cfg
+      """,
+      [ ( "MyMethod1 pass1", [ "foo", "bar" ] ),
+        ( "MyMethod2 pass2", [ "abc", "def" ] ) ])
diff --git a/tools/checker/file_format/checker/__init__.py b/tools/checker/file_format/checker/__init__.py
new file mode 100644
index 0000000000..d0a140be2b
--- /dev/null
+++ b/tools/checker/file_format/checker/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/file_format/checker/parser.py b/tools/checker/file_format/checker/parser.py
new file mode 100644
index 0000000000..d7a38dab4c
--- /dev/null
+++ b/tools/checker/file_format/checker/parser.py
@@ -0,0 +1,142 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from file_format.common         import SplitStream
+from file_format.checker.struct import CheckerFile, TestCase, TestAssertion, RegexExpression
+
+import re
+
+def __extractLine(prefix, line):
+  """ Attempts to parse a check line. The regex searches for a comment symbol
+      followed by the CHECK keyword, given attribute and a colon at the very
+      beginning of the line. Whitespaces are ignored.
+  """
+  rIgnoreWhitespace = r"\s*"
+  rCommentSymbols = [r"//", r"#"]
+  regexPrefix = rIgnoreWhitespace + \
+                r"(" + r"|".join(rCommentSymbols) + r")" + \
+                rIgnoreWhitespace + \
+                prefix + r":"
+
+  # The 'match' function succeeds only if the pattern is matched at the
+  # beginning of the line.
+  match = re.match(regexPrefix, line)
+  if match is not None:
+    return line[match.end():].strip()
+  else:
+    return None
+
+def __processLine(line, lineNo, prefix):
+  """ This function is invoked on each line of the check file and returns a pair
+      which instructs the parser how the line should be handled. If the line is
+      to be included in the current check group, it is returned in the first
+      value. If the line starts a new check group, the name of the group is
+      returned in the second value.
+  """
+  # Lines beginning with 'CHECK-START' start a new test case.
+  startLine = __extractLine(prefix + "-START", line)
+  if startLine is not None:
+    return None, startLine
+
+  # Lines starting only with 'CHECK' are matched in order.
+  plainLine = __extractLine(prefix, line)
+  if plainLine is not None:
+    return (plainLine, TestAssertion.Variant.InOrder, lineNo), None
+
+  # 'CHECK-DAG' lines are no-order assertions.
+  dagLine = __extractLine(prefix + "-DAG", line)
+  if dagLine is not None:
+    return (dagLine, TestAssertion.Variant.DAG, lineNo), None
+
+  # 'CHECK-NOT' lines are no-order negative assertions.
+  notLine = __extractLine(prefix + "-NOT", line)
+  if notLine is not None:
+    return (notLine, TestAssertion.Variant.Not, lineNo), None
+
+  # Other lines are ignored.
+  return None, None
+
+def __isMatchAtStart(match):
+  """ Tests if the given Match occurred at the beginning of the line. """
+  return (match is not None) and (match.start() == 0)
+
+def __firstMatch(matches, string):
+  """ Takes in a list of Match objects and returns the minimal start point among
+      them. If there aren't any successful matches it returns the length of
+      the searched string.
+  """
+  starts = map(lambda m: len(string) if m is None else m.start(), matches)
+  return min(starts)
+
+def ParseCheckerAssertion(parent, line, variant, lineNo):
+  """ This method parses the content of a check line stripped of the initial
+      comment symbol and the CHECK keyword.
+  """
+  assertion = TestAssertion(parent, variant, line, lineNo)
+  # Loop as long as there is something to parse.
+  while line:
+    # Search for the nearest occurrence of the special markers.
+    matchWhitespace = re.search(r"\s+", line)
+    matchPattern = re.search(RegexExpression.Regex.regexPattern, line)
+    matchVariableReference = re.search(RegexExpression.Regex.regexVariableReference, line)
+    matchVariableDefinition = re.search(RegexExpression.Regex.regexVariableDefinition, line)
+
+    # If one of the above was identified at the current position, extract them
+    # from the line, parse them and add to the list of line parts.
+    if __isMatchAtStart(matchWhitespace):
+      # A whitespace in the check line creates a new separator of line parts.
+      # This allows for ignored output between the previous and next parts.
+      line = line[matchWhitespace.end():]
+      assertion.addExpression(RegexExpression.createSeparator())
+    elif __isMatchAtStart(matchPattern):
+      pattern = line[0:matchPattern.end()]
+      pattern = pattern[2:-2]
+      line = line[matchPattern.end():]
+      assertion.addExpression(RegexExpression.createPattern(pattern))
+    elif __isMatchAtStart(matchVariableReference):
+      var = line[0:matchVariableReference.end()]
+      line = line[matchVariableReference.end():]
+      name = var[2:-2]
+      assertion.addExpression(RegexExpression.createVariableReference(name))
+    elif __isMatchAtStart(matchVariableDefinition):
+      var = line[0:matchVariableDefinition.end()]
+      line = line[matchVariableDefinition.end():]
+      colonPos = var.find(":")
+      name = var[2:colonPos]
+      body = var[colonPos+1:-2]
+      assertion.addExpression(RegexExpression.createVariableDefinition(name, body))
+    else:
+      # If we're not currently looking at a special marker, this is a plain
+      # text match all the way until the first special marker (or the end
+      # of the line).
+      firstMatch = __firstMatch([ matchWhitespace,
+                                  matchPattern,
+                                  matchVariableReference,
+                                  matchVariableDefinition ],
+                                line)
+      text = line[0:firstMatch]
+      line = line[firstMatch:]
+      assertion.addExpression(RegexExpression.createText(text))
+  return assertion
+
+def ParseCheckerStream(fileName, prefix, stream):
+  checkerFile = CheckerFile(fileName)
+  fnProcessLine = lambda line, lineNo: __processLine(line, lineNo, prefix)
+  fnLineOutsideChunk = lambda line, lineNo: \
+      Logger.fail("C1visualizer line not inside a group", fileName, lineNo)
+  for caseName, caseLines, startLineNo in SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+    testCase = TestCase(checkerFile, caseName, startLineNo)
+    for caseLine in caseLines:
+      ParseCheckerAssertion(testCase, caseLine[0], caseLine[1], caseLine[2])
+  return checkerFile
diff --git a/tools/checker/file_format/checker/struct.py b/tools/checker/file_format/checker/struct.py
new file mode 100644
index 0000000000..3354cb6f70
--- /dev/null
+++ b/tools/checker/file_format/checker/struct.py
@@ -0,0 +1,156 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger import Logger
+from common.mixins import EqualityMixin, PrintableMixin
+
+import re
+
+class CheckerFile(PrintableMixin):
+
+  def __init__(self, fileName):
+    self.fileName = fileName
+    self.testCases = []
+
+  def addTestCase(self, new_test_case):
+    self.testCases.append(new_test_case)
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.testCases == other.testCases
+
+
+class TestCase(PrintableMixin):
+
+  def __init__(self, parent, name, startLineNo):
+    assert isinstance(parent, CheckerFile)
+
+    self.parent = parent
+    self.name = name
+    self.assertions = []
+    self.startLineNo = startLineNo
+
+    if not self.name:
+      Logger.fail("Test case does not have a name", self.parent.fileName, self.startLineNo)
+
+    self.parent.addTestCase(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def addAssertion(self, new_assertion):
+    self.assertions.append(new_assertion)
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.name == other.name \
+       and self.assertions == other.assertions
+
+
+class TestAssertion(PrintableMixin):
+
+  class Variant(object):
+    """Supported types of assertions."""
+    InOrder, DAG, Not = range(3)
+
+  def __init__(self, parent, variant, originalText, lineNo):
+    assert isinstance(parent, TestCase)
+
+    self.parent = parent
+    self.variant = variant
+    self.expressions = []
+    self.lineNo = lineNo
+    self.originalText = originalText
+
+    self.parent.addAssertion(self)
+
+  @property
+  def fileName(self):
+    return self.parent.fileName
+
+  def addExpression(self, new_expression):
+    assert isinstance(new_expression, RegexExpression)
+    if self.variant == TestAssertion.Variant.Not:
+      if new_expression.variant == RegexExpression.Variant.VarDef:
+        Logger.fail("CHECK-NOT lines cannot define variables", self.fileName, self.lineNo)
+    self.expressions.append(new_expression)
+
+  def toRegex(self):
+    """ Returns a regex pattern for this entire assertion. Only used in tests. """
+    regex = ""
+    for expression in self.expressions:
+      if expression.variant == RegexExpression.Variant.Separator:
+        regex = regex + ", "
+      else:
+        regex = regex + "(" + expression.pattern + ")"
+    return regex
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.variant == other.variant \
+       and self.expressions == other.expressions
+
+
+class RegexExpression(EqualityMixin, PrintableMixin):
+
+  class Variant(object):
+    """Supported language constructs."""
+    Text, Pattern, VarRef, VarDef, Separator = range(5)
+
+  class Regex(object):
+    rName = r"([a-zA-Z][a-zA-Z0-9]*)"
+    rRegex = r"(.+?)"
+    rPatternStartSym = r"(\{\{)"
+    rPatternEndSym = r"(\}\})"
+    rVariableStartSym = r"(\[\[)"
+    rVariableEndSym = r"(\]\])"
+    rVariableSeparator = r"(:)"
+
+    regexPattern = rPatternStartSym + rRegex + rPatternEndSym
+    regexVariableReference = rVariableStartSym + rName + rVariableEndSym
+    regexVariableDefinition = rVariableStartSym + rName + rVariableSeparator + rRegex + rVariableEndSym
+
+  def __init__(self, variant, name, pattern):
+    self.variant = variant
+    self.name = name
+    self.pattern = pattern
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) \
+       and self.variant == other.variant \
+       and self.name == other.name \
+       and self.pattern == other.pattern
+
+  @staticmethod
+  def createSeparator():
+    return RegexExpression(RegexExpression.Variant.Separator, None, None)
+
+  @staticmethod
+  def createText(text):
+    return RegexExpression(RegexExpression.Variant.Text, None, re.escape(text))
+
+  @staticmethod
+  def createPattern(pattern):
+    return RegexExpression(RegexExpression.Variant.Pattern, None, pattern)
+
+  @staticmethod
+  def createVariableReference(name):
+    assert re.match(RegexExpression.Regex.rName, name)
+    return RegexExpression(RegexExpression.Variant.VarRef, name, None)
+
+  @staticmethod
+  def createVariableDefinition(name, pattern):
+    assert re.match(RegexExpression.Regex.rName, name)
+    return RegexExpression(RegexExpression.Variant.VarDef, name, pattern)
diff --git a/tools/checker/file_format/checker/test.py b/tools/checker/file_format/checker/test.py
new file mode 100644
index 0000000000..167c8880e9
--- /dev/null
+++ b/tools/checker/file_format/checker/test.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.testing             import ToUnicode
+from file_format.checker.parser import ParseCheckerStream
+from file_format.checker.struct import CheckerFile, TestCase, TestAssertion, RegexExpression
+
+import io
+import unittest
+
+CheckerException = SystemExit
+
+class CheckerParser_PrefixTest(unittest.TestCase):
+
+  def tryParse(self, string):
+    checkerText = u"// CHECK-START: pass\n" + ToUnicode(string)
+    checkFile = ParseCheckerStream("<test-file>", "CHECK", io.StringIO(checkerText))
+    self.assertEqual(len(checkFile.testCases), 1)
+    testCase = checkFile.testCases[0]
+    return len(testCase.assertions) != 0
+
+  def test_InvalidFormat(self):
+    self.assertFalse(self.tryParse("CHECK"))
+    self.assertFalse(self.tryParse(":CHECK"))
+    self.assertFalse(self.tryParse("CHECK:"))
+    self.assertFalse(self.tryParse("//CHECK"))
+    self.assertFalse(self.tryParse("#CHECK"))
+
+    self.assertTrue(self.tryParse("//CHECK:foo"))
+    self.assertTrue(self.tryParse("#CHECK:bar"))
+
+  def test_InvalidLabel(self):
+    self.assertFalse(self.tryParse("//ACHECK:foo"))
+    self.assertFalse(self.tryParse("#ACHECK:foo"))
+
+  def test_NotFirstOnTheLine(self):
+    self.assertFalse(self.tryParse("A// CHECK: foo"))
+    self.assertFalse(self.tryParse("A # CHECK: foo"))
+    self.assertFalse(self.tryParse("// // CHECK: foo"))
+    self.assertFalse(self.tryParse("# # CHECK: foo"))
+
+  def test_WhitespaceAgnostic(self):
+    self.assertTrue(self.tryParse("  //CHECK: foo"))
+    self.assertTrue(self.tryParse("//  CHECK: foo"))
+    self.assertTrue(self.tryParse("    //CHECK: foo"))
+    self.assertTrue(self.tryParse("//    CHECK: foo"))
+
+
+class CheckerParser_RegexExpressionTest(unittest.TestCase):
+
+  def parseAssertion(self, string, variant=""):
+    checkerText = u"// CHECK-START: pass\n// CHECK" + ToUnicode(variant) + u": " + ToUnicode(string)
+    checkerFile = ParseCheckerStream("<test-file>", "CHECK", io.StringIO(checkerText))
+    self.assertEqual(len(checkerFile.testCases), 1)
+    testCase = checkerFile.testCases[0]
+    self.assertEqual(len(testCase.assertions), 1)
+    return testCase.assertions[0]
+
+  def parseExpression(self, string):
+    line = self.parseAssertion(string)
+    self.assertEqual(1, len(line.expressions))
+    return line.expressions[0]
+
+  def assertEqualsRegex(self, string, expected):
+    self.assertEqual(expected, self.parseAssertion(string).toRegex())
+
+  def assertEqualsText(self, string, text):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createText(text))
+
+  def assertEqualsPattern(self, string, pattern):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createPattern(pattern))
+
+  def assertEqualsVarRef(self, string, name):
+    self.assertEqual(self.parseExpression(string), RegexExpression.createVariableReference(name))
+
+  def assertEqualsVarDef(self, string, name, pattern):
+    self.assertEqual(self.parseExpression(string),
+                     RegexExpression.createVariableDefinition(name, pattern))
+
+  def assertVariantNotEqual(self, string, variant):
+    self.assertNotEqual(variant, self.parseExpression(string).variant)
+
+  # Test that individual parts of the line are recognized
+
+  def test_TextOnly(self):
+    self.assertEqualsText("foo", "foo")
+    self.assertEqualsText("  foo  ", "foo")
+    self.assertEqualsRegex("f$o^o", "(f\$o\^o)")
+
+  def test_PatternOnly(self):
+    self.assertEqualsPattern("{{a?b.c}}", "a?b.c")
+
+  def test_VarRefOnly(self):
+    self.assertEqualsVarRef("[[ABC]]", "ABC")
+
+  def test_VarDefOnly(self):
+    self.assertEqualsVarDef("[[ABC:a?b.c]]", "ABC", "a?b.c")
+
+  def test_TextWithWhitespace(self):
+    self.assertEqualsRegex("foo bar", "(foo), (bar)")
+    self.assertEqualsRegex("foo   bar", "(foo), (bar)")
+
+  def test_TextWithRegex(self):
+    self.assertEqualsRegex("foo{{abc}}bar", "(foo)(abc)(bar)")
+
+  def test_TextWithVar(self):
+    self.assertEqualsRegex("foo[[ABC:abc]]bar", "(foo)(abc)(bar)")
+
+  def test_PlainWithRegexAndWhitespaces(self):
+    self.assertEqualsRegex("foo {{abc}}bar", "(foo), (abc)(bar)")
+    self.assertEqualsRegex("foo{{abc}} bar", "(foo)(abc), (bar)")
+    self.assertEqualsRegex("foo {{abc}} bar", "(foo), (abc), (bar)")
+
+  def test_PlainWithVarAndWhitespaces(self):
+    self.assertEqualsRegex("foo [[ABC:abc]]bar", "(foo), (abc)(bar)")
+    self.assertEqualsRegex("foo[[ABC:abc]] bar", "(foo)(abc), (bar)")
+    self.assertEqualsRegex("foo [[ABC:abc]] bar", "(foo), (abc), (bar)")
+
+  def test_AllKinds(self):
+    self.assertEqualsRegex("foo [[ABC:abc]]{{def}}bar", "(foo), (abc)(def)(bar)")
+    self.assertEqualsRegex("foo[[ABC:abc]] {{def}}bar", "(foo)(abc), (def)(bar)")
+    self.assertEqualsRegex("foo [[ABC:abc]] {{def}} bar", "(foo), (abc), (def), (bar)")
+
+  # # Test that variables and patterns are parsed correctly
+
+  def test_ValidPattern(self):
+    self.assertEqualsPattern("{{abc}}", "abc")
+    self.assertEqualsPattern("{{a[b]c}}", "a[b]c")
+    self.assertEqualsPattern("{{(a{bc})}}", "(a{bc})")
+
+  def test_ValidRef(self):
+    self.assertEqualsVarRef("[[ABC]]", "ABC")
+    self.assertEqualsVarRef("[[A1BC2]]", "A1BC2")
+
+  def test_ValidDef(self):
+    self.assertEqualsVarDef("[[ABC:abc]]", "ABC", "abc")
+    self.assertEqualsVarDef("[[ABC:ab:c]]", "ABC", "ab:c")
+    self.assertEqualsVarDef("[[ABC:a[b]c]]", "ABC", "a[b]c")
+    self.assertEqualsVarDef("[[ABC:(a[bc])]]", "ABC", "(a[bc])")
+
+  def test_Empty(self):
+    self.assertVariantNotEqual("{{}}", RegexExpression.Variant.Pattern)
+    self.assertVariantNotEqual("[[]]", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("[[:]]", RegexExpression.Variant.VarDef)
+
+  def test_InvalidVarName(self):
+    self.assertVariantNotEqual("[[0ABC]]", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("[[AB=C]]", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("[[ABC=]]", RegexExpression.Variant.VarRef)
+    self.assertVariantNotEqual("[[0ABC:abc]]", RegexExpression.Variant.VarDef)
+    self.assertVariantNotEqual("[[AB=C:abc]]", RegexExpression.Variant.VarDef)
+    self.assertVariantNotEqual("[[ABC=:abc]]", RegexExpression.Variant.VarDef)
+
+  def test_BodyMatchNotGreedy(self):
+    self.assertEqualsRegex("{{abc}}{{def}}", "(abc)(def)")
+    self.assertEqualsRegex("[[ABC:abc]][[DEF:def]]", "(abc)(def)")
+
+  def test_NoVarDefsInNotChecks(self):
+    with self.assertRaises(CheckerException):
+      self.parseAssertion("[[ABC:abc]]", "-NOT")
+
+
+class CheckerParser_FileLayoutTest(unittest.TestCase):
+
+  # Creates an instance of CheckerFile from provided info.
+  # Data format: [ ( <case-name>, [ ( <text>, <assert-variant> ), ... ] ), ... ]
+  def createFile(self, caseList):
+    testFile = CheckerFile("<test_file>")
+    for caseEntry in caseList:
+      caseName = caseEntry[0]
+      testCase = TestCase(testFile, caseName, 0)
+      assertionList = caseEntry[1]
+      for assertionEntry in assertionList:
+        content = assertionEntry[0]
+        variant = assertionEntry[1]
+        assertion = TestAssertion(testCase, variant, content, 0)
+        assertion.addExpression(RegexExpression.createText(content))
+    return testFile
+
+  def assertParsesTo(self, checkerText, expectedData):
+    expectedFile = self.createFile(expectedData)
+    actualFile = ParseCheckerStream("<test_file>", "CHECK", io.StringIO(ToUnicode(checkerText)))
+    return self.assertEqual(expectedFile, actualFile)
+
+  def test_EmptyFile(self):
+    self.assertParsesTo("", [])
+
+  def test_SingleGroup(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group
+        // CHECK:  foo
+        // CHECK:    bar
+      """,
+      [ ( "Example Group", [ ("foo", TestAssertion.Variant.InOrder),
+                             ("bar", TestAssertion.Variant.InOrder) ] ) ])
+
+  def test_MultipleGroups(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group1
+        // CHECK: foo
+        // CHECK: bar
+        // CHECK-START: Example Group2
+        // CHECK: abc
+        // CHECK: def
+      """,
+      [ ( "Example Group1", [ ("foo", TestAssertion.Variant.InOrder),
+                              ("bar", TestAssertion.Variant.InOrder) ] ),
+        ( "Example Group2", [ ("abc", TestAssertion.Variant.InOrder),
+                              ("def", TestAssertion.Variant.InOrder) ] ) ])
+
+  def test_AssertionVariants(self):
+    self.assertParsesTo(
+      """
+        // CHECK-START: Example Group
+        // CHECK:     foo
+        // CHECK-NOT: bar
+        // CHECK-DAG: abc
+        // CHECK-DAG: def
+      """,
+      [ ( "Example Group", [ ("foo", TestAssertion.Variant.InOrder),
+                             ("bar", TestAssertion.Variant.Not),
+                             ("abc", TestAssertion.Variant.DAG),
+                             ("def", TestAssertion.Variant.DAG) ] ) ])
diff --git a/tools/checker/file_format/common.py b/tools/checker/file_format/common.py
new file mode 100644
index 0000000000..f91fdeb9cc
--- /dev/null
+++ b/tools/checker/file_format/common.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+def SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
+  """ Reads the given input stream and splits it into chunks based on
+      information extracted from individual lines.
+
+  Arguments:
+   - fnProcessLine: Called on each line with the text and line number. Must
+     return a pair, name of the chunk started on this line and data extracted
+     from this line (or None in both cases).
+   - fnLineOutsideChunk: Called on attempt to attach data prior to creating
+     a chunk.
+  """
+  lineNo = 0
+  allChunks = []
+  currentChunk = None
+
+  for line in stream:
+    lineNo += 1
+    line = line.strip()
+    if not line:
+      continue
+
+    # Let the child class process the line and return information about it.
+    # The _processLine method can modify the content of the line (or delete it
+    # entirely) and specify whether it starts a new group.
+    processedLine, newChunkName = fnProcessLine(line, lineNo)
+    if newChunkName is not None:
+      currentChunk = (newChunkName, [], lineNo)
+      allChunks.append(currentChunk)
+    if processedLine is not None:
+      if currentChunk is not None:
+        currentChunk[1].append(processedLine)
+      else:
+        fnLineOutsideChunk(line, lineNo)
+  return allChunks
diff --git a/tools/checker/match/__init__.py b/tools/checker/match/__init__.py
new file mode 100644
index 0000000000..d0a140be2b
--- /dev/null
+++ b/tools/checker/match/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tools/checker/match/file.py b/tools/checker/match/file.py
new file mode 100644
index 0000000000..d9da690e28
--- /dev/null
+++ b/tools/checker/match/file.py
@@ -0,0 +1,147 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger                    import Logger
+from file_format.c1visualizer.struct  import C1visualizerFile, C1visualizerPass
+from file_format.checker.struct       import CheckerFile, TestCase, TestAssertion
+from match.line                       import MatchLines
+
+def __headAndTail(list):
+  return list[0], list[1:]
+
+def __splitByVariant(lines, variant):
+  """ Splits a list of check lines at index 'i' such that lines[i] is the first
+      element whose variant is not equal to the given parameter.
+  """
+  i = 0
+  while i < len(lines) and lines[i].variant == variant:
+    i += 1
+  return lines[:i], lines[i:]
+
+def __nextIndependentChecks(checkLines):
+  """ Extracts the first sequence of check lines which are independent of each
+      other's match location, i.e. either consecutive DAG lines or a single
+      InOrder line. Any Not lines preceeding this sequence are also extracted.
+  """
+  notChecks, checkLines = __splitByVariant(checkLines, TestAssertion.Variant.Not)
+  if not checkLines:
+    return notChecks, [], []
+
+  head, tail = __headAndTail(checkLines)
+  if head.variant == TestAssertion.Variant.InOrder:
+    return notChecks, [head], tail
+  else:
+    assert head.variant == TestAssertion.Variant.DAG
+    independentChecks, checkLines = __splitByVariant(checkLines, TestAssertion.Variant.DAG)
+    return notChecks, independentChecks, checkLines
+
+def __findFirstMatch(checkLine, outputLines, startLineNo, lineFilter, varState):
+  """ If successful, returns the line number of the first output line matching
+      the check line and the updated variable state. Otherwise returns -1 and
+      None, respectively. The 'lineFilter' parameter can be used to supply a
+      list of line numbers (counting from 1) which should be skipped.
+  """
+  matchLineNo = startLineNo
+  for outputLine in outputLines:
+    if matchLineNo not in lineFilter:
+      newVarState = MatchLines(checkLine, outputLine, varState)
+      if newVarState is not None:
+        return matchLineNo, newVarState
+    matchLineNo += 1
+  return -1, None
+
+def __matchIndependentChecks(checkLines, outputLines, startLineNo, varState):
+  """ Matches the given positive check lines against the output in order of
+      appearance. Variable state is propagated but the scope of the search
+      remains the same for all checks. Each output line can only be matched
+      once. If all check lines are matched, the resulting variable state is
+      returned together with the remaining output. The function also returns
+      output lines which appear before either of the matched lines so they can
+      be tested against Not checks.
+  """
+  # If no checks are provided, skip over the entire output.
+  if not checkLines:
+    return outputLines, [], startLineNo + len(outputLines), varState
+
+  # Keep track of which lines have been matched.
+  matchedLines = []
+
+  # Find first unused output line which matches each check line.
+  for checkLine in checkLines:
+    matchLineNo, varState = \
+      __findFirstMatch(checkLine, outputLines, startLineNo, matchedLines, varState)
+    if varState is None:
+      Logger.testFailed("Could not match check line \"" + checkLine.originalText + "\" " +
+                        "starting from output line " + str(startLineNo),
+                        checkLine.fileName, checkLine.lineNo)
+    matchedLines.append(matchLineNo)
+
+  # Return new variable state and the output lines which lie outside the
+  # match locations of this independent group.
+  minMatchLineNo = min(matchedLines)
+  maxMatchLineNo = max(matchedLines)
+  preceedingLines = outputLines[:minMatchLineNo - startLineNo]
+  remainingLines = outputLines[maxMatchLineNo - startLineNo + 1:]
+  return preceedingLines, remainingLines, maxMatchLineNo + 1, varState
+
+def __matchNotLines(checkLines, outputLines, startLineNo, varState):
+  """ Makes sure that the given check lines do not match any of the given output
+      lines. Variable state does not change.
+  """
+  for checkLine in checkLines:
+    assert checkLine.variant == TestAssertion.Variant.Not
+    matchLineNo, matchVarState = \
+      __findFirstMatch(checkLine, outputLines, startLineNo, [], varState)
+    if matchVarState is not None:
+      Logger.testFailed("CHECK-NOT line \"" + checkLine.originalText + "\" matches output line " + \
+                        str(matchLineNo), checkLine.fileName, checkLine.lineNo)
+
+def __matchGroups(checkGroup, outputGroup):
+  """ Matches the check lines in this group against an output group. It is
+      responsible for running the checks in the right order and scope, and
+      for propagating the variable state between the check lines.
+  """
+  varState = {}
+  checkLines = checkGroup.assertions
+  outputLines = outputGroup.body
+  startLineNo = outputGroup.startLineNo
+
+  while checkLines:
+    # Extract the next sequence of location-independent checks to be matched.
+    notChecks, independentChecks, checkLines = __nextIndependentChecks(checkLines)
+
+    # Match the independent checks.
+    notOutput, outputLines, newStartLineNo, newVarState = \
+      __matchIndependentChecks(independentChecks, outputLines, startLineNo, varState)
+
+    # Run the Not checks against the output lines which lie between the last
+    # two independent groups or the bounds of the output.
+    __matchNotLines(notChecks, notOutput, startLineNo, varState)
+
+    # Update variable state.
+    startLineNo = newStartLineNo
+    varState = newVarState
+
+def MatchFiles(checkerFile, c1File):
+  for testCase in checkerFile.testCases:
+    # TODO: Currently does not handle multiple occurrences of the same group
+    # name, e.g. when a pass is run multiple times. It will always try to
+    # match a check group against the first output group of the same name.
+    c1Pass = c1File.findPass(testCase.name)
+    if c1Pass is None:
+      Logger.fail("Test case \"" + testCase.name + "\" not found in the C1visualizer output",
+                  testCase.fileName, testCase.lineNo)
+    Logger.startTest(testCase.name)
+    __matchGroups(testCase, c1Pass)
+    Logger.testPassed()
diff --git a/tools/checker/match/line.py b/tools/checker/match/line.py
new file mode 100644
index 0000000000..f0253c351b
--- /dev/null
+++ b/tools/checker/match/line.py
@@ -0,0 +1,89 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger              import Logger
+from file_format.checker.struct import TestAssertion, RegexExpression
+
+import re
+
+def __isMatchAtStart(match):
+  """ Tests if the given Match occurred at the beginning of the line. """
+  return (match is not None) and (match.start() == 0)
+
+def __generatePattern(checkLine, linePart, varState):
+  """ Returns the regex pattern to be matched in the output line. Variable
+      references are substituted with their current values provided in the
+      'varState' argument.
+
+  An exception is raised if a referenced variable is undefined.
+  """
+  if linePart.variant == RegexExpression.Variant.VarRef:
+    try:
+      return re.escape(varState[linePart.name])
+    except KeyError:
+      Logger.testFailed("Use of undefined variable \"" + linePart.name + "\"",
+                        checkLine.fileName, checkLine.lineNo)
+  else:
+    return linePart.pattern
+
+def __isSeparated(outputLine, matchStart):
+  return (matchStart == 0) or (outputLine[matchStart - 1:matchStart].isspace())
+
+def MatchLines(checkLine, outputLine, initialVarState):
+  """ Attempts to match the check line against a line from the output file with
+      the given initial variable values. It returns the new variable state if
+      successful and None otherwise.
+  """
+  # Do the full matching on a shadow copy of the variable state. If the
+  # matching fails half-way, we will not need to revert the state.
+  varState = dict(initialVarState)
+
+  matchStart = 0
+  isAfterSeparator = True
+
+  # Now try to parse all of the parts of the check line in the right order.
+  # Variable values are updated on-the-fly, meaning that a variable can
+  # be referenced immediately after its definition.
+  for part in checkLine.expressions:
+    if part.variant == RegexExpression.Variant.Separator:
+      isAfterSeparator = True
+      continue
+
+    # Find the earliest match for this line part.
+    pattern = __generatePattern(checkLine, part, varState)
+    while True:
+      match = re.search(pattern, outputLine[matchStart:])
+      if (match is None) or (not isAfterSeparator and not __isMatchAtStart(match)):
+        return None
+      matchEnd = matchStart + match.end()
+      matchStart += match.start()
+
+      # Check if this is a valid match if we expect a whitespace separator
+      # before the matched text. Otherwise loop and look for another match.
+      if not isAfterSeparator or __isSeparated(outputLine, matchStart):
+        break
+      else:
+        matchStart += 1
+
+    if part.variant == RegexExpression.Variant.VarDef:
+      if part.name in varState:
+        Logger.testFailed("Multiple definitions of variable \"" + part.name + "\"",
+                          checkLine.fileName, checkLine.lineNo)
+      varState[part.name] = outputLine[matchStart:matchEnd]
+
+    matchStart = matchEnd
+    isAfterSeparator = False
+
+  # All parts were successfully matched. Return the new variable state.
+  return varState
diff --git a/tools/checker/match/test.py b/tools/checker/match/test.py
new file mode 100644
index 0000000000..62e8e000e8
--- /dev/null
+++ b/tools/checker/match/test.py
@@ -0,0 +1,326 @@
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.testing                  import ToUnicode
+from file_format.c1visualizer.parser import ParseC1visualizerStream
+from file_format.c1visualizer.struct import C1visualizerFile, C1visualizerPass
+from file_format.checker.parser      import ParseCheckerStream, ParseCheckerAssertion
+from file_format.checker.struct      import CheckerFile, TestCase, TestAssertion, RegexExpression
+from match.file                      import MatchFiles
+from match.line                      import MatchLines
+
+import io
+import unittest
+
+CheckerException = SystemExit
+
+class MatchLines_Test(unittest.TestCase):
+
+  def createTestAssertion(self, checkerString):
+    checkerFile = CheckerFile("<checker-file>")
+    testCase = TestCase(checkerFile, "TestMethod TestPass", 0)
+    return ParseCheckerAssertion(testCase, checkerString, TestAssertion.Variant.InOrder, 0)
+
+  def tryMatch(self, checkerString, c1String, varState={}):
+    return MatchLines(self.createTestAssertion(checkerString), ToUnicode(c1String), varState)
+
+  def matches(self, checkerString, c1String, varState={}):
+    return self.tryMatch(checkerString, c1String, varState) is not None
+
+  def test_TextAndWhitespace(self):
+    self.assertTrue(self.matches("foo", "foo"))
+    self.assertTrue(self.matches("foo", "  foo  "))
+    self.assertTrue(self.matches("foo", "foo bar"))
+    self.assertFalse(self.matches("foo", "XfooX"))
+    self.assertFalse(self.matches("foo", "zoo"))
+
+    self.assertTrue(self.matches("foo bar", "foo   bar"))
+    self.assertTrue(self.matches("foo bar", "abc foo bar def"))
+    self.assertTrue(self.matches("foo bar", "foo foo bar bar"))
+
+    self.assertTrue(self.matches("foo bar", "foo X bar"))
+    self.assertFalse(self.matches("foo bar", "foo Xbar"))
+
+  def test_Pattern(self):
+    self.assertTrue(self.matches("foo{{A|B}}bar", "fooAbar"))
+    self.assertTrue(self.matches("foo{{A|B}}bar", "fooBbar"))
+    self.assertFalse(self.matches("foo{{A|B}}bar", "fooCbar"))
+
+  def test_VariableReference(self):
+    self.assertTrue(self.matches("foo[[X]]bar", "foobar", {"X": ""}))
+    self.assertTrue(self.matches("foo[[X]]bar", "fooAbar", {"X": "A"}))
+    self.assertTrue(self.matches("foo[[X]]bar", "fooBbar", {"X": "B"}))
+    self.assertFalse(self.matches("foo[[X]]bar", "foobar", {"X": "A"}))
+    self.assertFalse(self.matches("foo[[X]]bar", "foo bar", {"X": "A"}))
+    with self.assertRaises(CheckerException):
+      self.assertTrue(self.matches("foo[[X]]bar", "foobar", {}))
+
+  def test_VariableDefinition(self):
+    self.assertTrue(self.matches("foo[[X:A|B]]bar", "fooAbar"))
+    self.assertTrue(self.matches("foo[[X:A|B]]bar", "fooBbar"))
+    self.assertFalse(self.matches("foo[[X:A|B]]bar", "fooCbar"))
+
+    env = self.tryMatch("foo[[X:A.*B]]bar", "fooABbar", {})
+    self.assertEqual(env, {"X": "AB"})
+    env = self.tryMatch("foo[[X:A.*B]]bar", "fooAxxBbar", {})
+    self.assertEqual(env, {"X": "AxxB"})
+
+    self.assertTrue(self.matches("foo[[X:A|B]]bar[[X]]baz", "fooAbarAbaz"))
+    self.assertTrue(self.matches("foo[[X:A|B]]bar[[X]]baz", "fooBbarBbaz"))
+    self.assertFalse(self.matches("foo[[X:A|B]]bar[[X]]baz", "fooAbarBbaz"))
+
+  def test_NoVariableRedefinition(self):
+    with self.assertRaises(CheckerException):
+      self.matches("[[X:...]][[X]][[X:...]][[X]]", "foofoobarbar")
+
+  def test_EnvNotChangedOnPartialMatch(self):
+    env = {"Y": "foo"}
+    self.assertFalse(self.matches("[[X:A]]bar", "Abaz", env))
+    self.assertFalse("X" in env.keys())
+
+  def test_VariableContentEscaped(self):
+    self.assertTrue(self.matches("[[X:..]]foo[[X]]", ".*foo.*"))
+    self.assertFalse(self.matches("[[X:..]]foo[[X]]", ".*fooAAAA"))
+
+
+class MatchFiles_Test(unittest.TestCase):
+
+  def matches(self, checkerString, c1String):
+    checkerString = \
+      """
+        // CHECK-START: MyMethod MyPass
+      """ + checkerString
+    c1String = \
+      """
+        begin_compilation
+          name "MyMethod"
+          method "MyMethod"
+          date 1234
+        end_compilation
+        begin_cfg
+          name "MyPass"
+      """ + c1String + \
+      """
+        end_cfg
+      """
+    checkerFile = ParseCheckerStream("<test-file>", "CHECK", io.StringIO(ToUnicode(checkerString)))
+    c1File = ParseC1visualizerStream("<c1-file>", io.StringIO(ToUnicode(c1String)))
+    try:
+      MatchFiles(checkerFile, c1File)
+      return True
+    except CheckerException:
+      return False
+
+  def test_Text(self):
+    self.assertTrue(self.matches( "// CHECK: foo bar", "foo bar"))
+    self.assertFalse(self.matches("// CHECK: foo bar", "abc def"))
+
+  def test_Pattern(self):
+    self.assertTrue(self.matches( "// CHECK: abc {{de.}}", "abc de#"))
+    self.assertFalse(self.matches("// CHECK: abc {{de.}}", "abc d#f"))
+
+  def test_Variables(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK: foo[[X:.]]bar
+      // CHECK: abc[[X]]def
+    """,
+    """
+      foo bar
+      abc def
+    """))
+    self.assertTrue(self.matches(
+    """
+      // CHECK: foo[[X:([0-9]+)]]bar
+      // CHECK: abc[[X]]def
+      // CHECK: ### [[X]] ###
+    """,
+    """
+      foo1234bar
+      abc1234def
+      ### 1234 ###
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK: foo[[X:([0-9]+)]]bar
+      // CHECK: abc[[X]]def
+    """,
+    """
+      foo1234bar
+      abc1235def
+    """))
+
+  def test_InOrderAssertions(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK: foo
+      // CHECK: bar
+    """,
+    """
+      foo
+      bar
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK: foo
+      // CHECK: bar
+    """,
+    """
+      bar
+      foo
+    """))
+
+  def test_DagAssertions(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK-DAG: foo
+      // CHECK-DAG: bar
+    """,
+    """
+      foo
+      bar
+    """))
+    self.assertTrue(self.matches(
+    """
+      // CHECK-DAG: foo
+      // CHECK-DAG: bar
+    """,
+    """
+      bar
+      foo
+    """))
+
+  def test_DagAssertionsScope(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK:     foo
+      // CHECK-DAG: abc
+      // CHECK-DAG: def
+      // CHECK:     bar
+    """,
+    """
+      foo
+      def
+      abc
+      bar
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK:     foo
+      // CHECK-DAG: abc
+      // CHECK-DAG: def
+      // CHECK:     bar
+    """,
+    """
+      foo
+      abc
+      bar
+      def
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK:     foo
+      // CHECK-DAG: abc
+      // CHECK-DAG: def
+      // CHECK:     bar
+    """,
+    """
+      foo
+      def
+      bar
+      abc
+    """))
+
+  def test_NotAssertions(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK-NOT: foo
+    """,
+    """
+      abc
+      def
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK-NOT: foo
+    """,
+    """
+      abc foo
+      def
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK-NOT: foo
+      // CHECK-NOT: bar
+    """,
+    """
+      abc
+      def bar
+    """))
+
+  def test_NotAssertionsScope(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK:     abc
+      // CHECK-NOT: foo
+      // CHECK:     def
+    """,
+    """
+      abc
+      def
+    """))
+    self.assertTrue(self.matches(
+    """
+      // CHECK:     abc
+      // CHECK-NOT: foo
+      // CHECK:     def
+    """,
+    """
+      abc
+      def
+      foo
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK:     abc
+      // CHECK-NOT: foo
+      // CHECK:     def
+    """,
+    """
+      abc
+      foo
+      def
+    """))
+
+  def test_LineOnlyMatchesOnce(self):
+    self.assertTrue(self.matches(
+    """
+      // CHECK-DAG: foo
+      // CHECK-DAG: foo
+    """,
+    """
+      foo
+      abc
+      foo
+    """))
+    self.assertFalse(self.matches(
+    """
+      // CHECK-DAG: foo
+      // CHECK-DAG: foo
+    """,
+    """
+      foo
+      abc
+      bar
+    """))
diff --git a/tools/checker/run_unit_tests.py b/tools/checker/run_unit_tests.py
new file mode 100755
index 0000000000..01708dbd27
--- /dev/null
+++ b/tools/checker/run_unit_tests.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from common.logger                 import Logger
+from file_format.c1visualizer.test import C1visualizerParser_Test
+from file_format.checker.test      import CheckerParser_PrefixTest, \
+                                          CheckerParser_RegexExpressionTest, \
+                                          CheckerParser_FileLayoutTest
+from match.test                    import MatchLines_Test, \
+                                          MatchFiles_Test
+
+import unittest
+
+if __name__ == '__main__':
+  Logger.Verbosity = Logger.Level.NoOutput
+  unittest.main(verbosity=2)
diff --git a/tools/checker_test.py b/tools/checker_test.py
deleted file mode 100755
index 667ca90079..0000000000
--- a/tools/checker_test.py
+++ /dev/null
@@ -1,474 +0,0 @@
-#!/usr/bin/env python2
-#
-# Copyright (C) 2014 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This is a test file which exercises all feautres supported by the domain-
-# specific markup language implemented by Checker.
-
-import checker
-import io
-import unittest
-
-# The parent type of exception expected to be thrown by Checker during tests.
-# It must be specific enough to not cover exceptions thrown due to actual flaws
-# in Checker.
-CheckerException = SystemExit
-
-
-class TestCheckFile_PrefixExtraction(unittest.TestCase):
-  def __tryParse(self, string):
-    checkFile = checker.CheckFile(None, [])
-    return checkFile._extractLine("CHECK", string)
-
-  def test_InvalidFormat(self):
-    self.assertIsNone(self.__tryParse("CHECK"))
-    self.assertIsNone(self.__tryParse(":CHECK"))
-    self.assertIsNone(self.__tryParse("CHECK:"))
-    self.assertIsNone(self.__tryParse("//CHECK"))
-    self.assertIsNone(self.__tryParse("#CHECK"))
-
-    self.assertIsNotNone(self.__tryParse("//CHECK:foo"))
-    self.assertIsNotNone(self.__tryParse("#CHECK:bar"))
-
-  def test_InvalidLabel(self):
-    self.assertIsNone(self.__tryParse("//ACHECK:foo"))
-    self.assertIsNone(self.__tryParse("#ACHECK:foo"))
-
-  def test_NotFirstOnTheLine(self):
-    self.assertIsNone(self.__tryParse("A// CHECK: foo"))
-    self.assertIsNone(self.__tryParse("A # CHECK: foo"))
-    self.assertIsNone(self.__tryParse("// // CHECK: foo"))
-    self.assertIsNone(self.__tryParse("# # CHECK: foo"))
-
-  def test_WhitespaceAgnostic(self):
-    self.assertIsNotNone(self.__tryParse("  //CHECK: foo"))
-    self.assertIsNotNone(self.__tryParse("//  CHECK: foo"))
-    self.assertIsNotNone(self.__tryParse("    //CHECK: foo"))
-    self.assertIsNotNone(self.__tryParse("//    CHECK: foo"))
-
-
-class TestCheckLine_Parse(unittest.TestCase):
-  def __getPartPattern(self, linePart):
-    if linePart.variant == checker.CheckElement.Variant.Separator:
-      return "\s+"
-    else:
-      return linePart.pattern
-
-  def __getRegex(self, checkLine):
-    return "".join(map(lambda x: "(" + self.__getPartPattern(x) + ")", checkLine.lineParts))
-
-  def __tryParse(self, string):
-    return checker.CheckLine(string)
-
-  def __parsesTo(self, string, expected):
-    self.assertEqual(expected, self.__getRegex(self.__tryParse(string)))
-
-  def __tryParseNot(self, string):
-    return checker.CheckLine(string, checker.CheckLine.Variant.Not)
-
-  def __parsesPattern(self, string, pattern):
-    line = self.__tryParse(string)
-    self.assertEqual(1, len(line.lineParts))
-    self.assertEqual(checker.CheckElement.Variant.Pattern, line.lineParts[0].variant)
-    self.assertEqual(pattern, line.lineParts[0].pattern)
-
-  def __parsesVarRef(self, string, name):
-    line = self.__tryParse(string)
-    self.assertEqual(1, len(line.lineParts))
-    self.assertEqual(checker.CheckElement.Variant.VarRef, line.lineParts[0].variant)
-    self.assertEqual(name, line.lineParts[0].name)
-
-  def __parsesVarDef(self, string, name, body):
-    line = self.__tryParse(string)
-    self.assertEqual(1, len(line.lineParts))
-    self.assertEqual(checker.CheckElement.Variant.VarDef, line.lineParts[0].variant)
-    self.assertEqual(name, line.lineParts[0].name)
-    self.assertEqual(body, line.lineParts[0].pattern)
-
-  def __doesNotParse(self, string, partType):
-    line = self.__tryParse(string)
-    self.assertEqual(1, len(line.lineParts))
-    self.assertNotEqual(partType, line.lineParts[0].variant)
-
-  # Test that individual parts of the line are recognized
-
-  def test_TextOnly(self):
-    self.__parsesTo("foo", "(foo)")
-    self.__parsesTo("  foo  ", "(foo)")
-    self.__parsesTo("f$o^o", "(f\$o\^o)")
-
-  def test_TextWithWhitespace(self):
-    self.__parsesTo("foo bar", "(foo)(\s+)(bar)")
-    self.__parsesTo("foo   bar", "(foo)(\s+)(bar)")
-
-  def test_RegexOnly(self):
-    self.__parsesPattern("{{a?b.c}}", "a?b.c")
-
-  def test_VarRefOnly(self):
-    self.__parsesVarRef("[[ABC]]", "ABC")
-
-  def test_VarDefOnly(self):
-    self.__parsesVarDef("[[ABC:a?b.c]]", "ABC", "a?b.c")
-
-  def test_TextWithRegex(self):
-    self.__parsesTo("foo{{abc}}bar", "(foo)(abc)(bar)")
-
-  def test_TextWithVar(self):
-    self.__parsesTo("foo[[ABC:abc]]bar", "(foo)(abc)(bar)")
-
-  def test_PlainWithRegexAndWhitespaces(self):
-    self.__parsesTo("foo {{abc}}bar", "(foo)(\s+)(abc)(bar)")
-    self.__parsesTo("foo{{abc}} bar", "(foo)(abc)(\s+)(bar)")
-    self.__parsesTo("foo {{abc}} bar", "(foo)(\s+)(abc)(\s+)(bar)")
-
-  def test_PlainWithVarAndWhitespaces(self):
-    self.__parsesTo("foo [[ABC:abc]]bar", "(foo)(\s+)(abc)(bar)")
-    self.__parsesTo("foo[[ABC:abc]] bar", "(foo)(abc)(\s+)(bar)")
-    self.__parsesTo("foo [[ABC:abc]] bar", "(foo)(\s+)(abc)(\s+)(bar)")
-
-  def test_AllKinds(self):
-    self.__parsesTo("foo [[ABC:abc]]{{def}}bar", "(foo)(\s+)(abc)(def)(bar)")
-    self.__parsesTo("foo[[ABC:abc]] {{def}}bar", "(foo)(abc)(\s+)(def)(bar)")
-    self.__parsesTo("foo [[ABC:abc]] {{def}} bar", "(foo)(\s+)(abc)(\s+)(def)(\s+)(bar)")
-
-  # Test that variables and patterns are parsed correctly
-
-  def test_ValidPattern(self):
-    self.__parsesPattern("{{abc}}", "abc")
-    self.__parsesPattern("{{a[b]c}}", "a[b]c")
-    self.__parsesPattern("{{(a{bc})}}", "(a{bc})")
-
-  def test_ValidRef(self):
-    self.__parsesVarRef("[[ABC]]", "ABC")
-    self.__parsesVarRef("[[A1BC2]]", "A1BC2")
-
-  def test_ValidDef(self):
-    self.__parsesVarDef("[[ABC:abc]]", "ABC", "abc")
-    self.__parsesVarDef("[[ABC:ab:c]]", "ABC", "ab:c")
-    self.__parsesVarDef("[[ABC:a[b]c]]", "ABC", "a[b]c")
-    self.__parsesVarDef("[[ABC:(a[bc])]]", "ABC", "(a[bc])")
-
-  def test_Empty(self):
-    self.__doesNotParse("{{}}", checker.CheckElement.Variant.Pattern)
-    self.__doesNotParse("[[]]", checker.CheckElement.Variant.VarRef)
-    self.__doesNotParse("[[:]]", checker.CheckElement.Variant.VarDef)
-
-  def test_InvalidVarName(self):
-    self.__doesNotParse("[[0ABC]]", checker.CheckElement.Variant.VarRef)
-    self.__doesNotParse("[[AB=C]]", checker.CheckElement.Variant.VarRef)
-    self.__doesNotParse("[[ABC=]]", checker.CheckElement.Variant.VarRef)
-    self.__doesNotParse("[[0ABC:abc]]", checker.CheckElement.Variant.VarDef)
-    self.__doesNotParse("[[AB=C:abc]]", checker.CheckElement.Variant.VarDef)
-    self.__doesNotParse("[[ABC=:abc]]", checker.CheckElement.Variant.VarDef)
-
-  def test_BodyMatchNotGreedy(self):
-    self.__parsesTo("{{abc}}{{def}}", "(abc)(def)")
-    self.__parsesTo("[[ABC:abc]][[DEF:def]]", "(abc)(def)")
-
-  def test_NoVarDefsInNotChecks(self):
-    with self.assertRaises(CheckerException):
-      self.__tryParseNot("[[ABC:abc]]")
-
-class TestCheckLine_Match(unittest.TestCase):
-  def __matchSingle(self, checkString, outputString, varState={}):
-    checkLine = checker.CheckLine(checkString)
-    newVarState = checkLine.match(outputString, varState)
-    self.assertIsNotNone(newVarState)
-    return newVarState
-
-  def __notMatchSingle(self, checkString, outputString, varState={}):
-    checkLine = checker.CheckLine(checkString)
-    self.assertIsNone(checkLine.match(outputString, varState))
-
-  def test_TextAndWhitespace(self):
-    self.__matchSingle("foo", "foo")
-    self.__matchSingle("foo", "  foo  ")
-    self.__matchSingle("foo", "foo bar")
-    self.__notMatchSingle("foo", "XfooX")
-    self.__notMatchSingle("foo", "zoo")
-
-    self.__matchSingle("foo bar", "foo   bar")
-    self.__matchSingle("foo bar", "abc foo bar def")
-    self.__matchSingle("foo bar", "foo foo bar bar")
-
-    self.__matchSingle("foo bar", "foo X bar")
-    self.__notMatchSingle("foo bar", "foo Xbar")
-
-  def test_Pattern(self):
-    self.__matchSingle("foo{{A|B}}bar", "fooAbar")
-    self.__matchSingle("foo{{A|B}}bar", "fooBbar")
-    self.__notMatchSingle("foo{{A|B}}bar", "fooCbar")
-
-  def test_VariableReference(self):
-    self.__matchSingle("foo[[X]]bar", "foobar", {"X": ""})
-    self.__matchSingle("foo[[X]]bar", "fooAbar", {"X": "A"})
-    self.__matchSingle("foo[[X]]bar", "fooBbar", {"X": "B"})
-    self.__notMatchSingle("foo[[X]]bar", "foobar", {"X": "A"})
-    self.__notMatchSingle("foo[[X]]bar", "foo bar", {"X": "A"})
-    with self.assertRaises(CheckerException):
-      self.__matchSingle("foo[[X]]bar", "foobar", {})
-
-  def test_VariableDefinition(self):
-    self.__matchSingle("foo[[X:A|B]]bar", "fooAbar")
-    self.__matchSingle("foo[[X:A|B]]bar", "fooBbar")
-    self.__notMatchSingle("foo[[X:A|B]]bar", "fooCbar")
-
-    env = self.__matchSingle("foo[[X:A.*B]]bar", "fooABbar", {})
-    self.assertEqual(env, {"X": "AB"})
-    env = self.__matchSingle("foo[[X:A.*B]]bar", "fooAxxBbar", {})
-    self.assertEqual(env, {"X": "AxxB"})
-
-    self.__matchSingle("foo[[X:A|B]]bar[[X]]baz", "fooAbarAbaz")
-    self.__matchSingle("foo[[X:A|B]]bar[[X]]baz", "fooBbarBbaz")
-    self.__notMatchSingle("foo[[X:A|B]]bar[[X]]baz", "fooAbarBbaz")
-
-  def test_NoVariableRedefinition(self):
-    with self.assertRaises(CheckerException):
-      self.__matchSingle("[[X:...]][[X]][[X:...]][[X]]", "foofoobarbar")
-
-  def test_EnvNotChangedOnPartialMatch(self):
-    env = {"Y": "foo"}
-    self.__notMatchSingle("[[X:A]]bar", "Abaz", env)
-    self.assertFalse("X" in env.keys())
-
-  def test_VariableContentEscaped(self):
-    self.__matchSingle("[[X:..]]foo[[X]]", ".*foo.*")
-    self.__notMatchSingle("[[X:..]]foo[[X]]", ".*fooAAAA")
-
-
-CheckVariant = checker.CheckLine.Variant
-
-def prepareSingleCheck(line):
-  if isinstance(line, str):
-    return checker.CheckLine(line)
-  else:
-    return checker.CheckLine(line[0], line[1])
-
-def prepareChecks(lines):
-  if isinstance(lines, str):
-    lines = lines.splitlines()
-  return list(map(lambda line: prepareSingleCheck(line), lines))
-
-
-class TestCheckGroup_Match(unittest.TestCase):
-  def __matchMulti(self, checkLines, outputString):
-    checkGroup = checker.CheckGroup("MyGroup", prepareChecks(checkLines))
-    outputGroup = checker.OutputGroup("MyGroup", outputString.splitlines())
-    return checkGroup.match(outputGroup)
-
-  def __notMatchMulti(self, checkString, outputString):
-    with self.assertRaises(CheckerException):
-      self.__matchMulti(checkString, outputString)
-
-  def test_TextAndPattern(self):
-    self.__matchMulti("""foo bar
-                         abc {{def}}""",
-                      """foo bar
-                         abc def""");
-    self.__matchMulti("""foo bar
-                         abc {{de.}}""",
-                      """=======
-                         foo bar
-                         =======
-                         abc de#
-                         =======""");
-    self.__notMatchMulti("""//XYZ: foo bar
-                            //XYZ: abc {{def}}""",
-                         """=======
-                            foo bar
-                            =======
-                            abc de#
-                            =======""");
-
-  def test_Variables(self):
-    self.__matchMulti("""foo[[X:.]]bar
-                         abc[[X]]def""",
-                      """foo bar
-                         abc def""");
-    self.__matchMulti("""foo[[X:([0-9]+)]]bar
-                         abc[[X]]def
-                         ### [[X]] ###""",
-                      """foo1234bar
-                         abc1234def
-                         ### 1234 ###""");
-
-  def test_Ordering(self):
-    self.__matchMulti([("foo", CheckVariant.InOrder),
-                       ("bar", CheckVariant.InOrder)],
-                      """foo
-                         bar""")
-    self.__notMatchMulti([("foo", CheckVariant.InOrder),
-                          ("bar", CheckVariant.InOrder)],
-                         """bar
-                            foo""")
-    self.__matchMulti([("abc", CheckVariant.DAG),
-                       ("def", CheckVariant.DAG)],
-                      """abc
-                         def""")
-    self.__matchMulti([("abc", CheckVariant.DAG),
-                       ("def", CheckVariant.DAG)],
-                      """def
-                         abc""")
-    self.__matchMulti([("foo", CheckVariant.InOrder),
-                       ("abc", CheckVariant.DAG),
-                       ("def", CheckVariant.DAG),
-                       ("bar", CheckVariant.InOrder)],
-                      """foo
-                         def
-                         abc
-                         bar""")
-    self.__notMatchMulti([("foo", CheckVariant.InOrder),
-                          ("abc", CheckVariant.DAG),
-                          ("def", CheckVariant.DAG),
-                          ("bar", CheckVariant.InOrder)],
-                         """foo
-                            abc
-                            bar""")
-    self.__notMatchMulti([("foo", CheckVariant.InOrder),
-                          ("abc", CheckVariant.DAG),
-                          ("def", CheckVariant.DAG),
-                          ("bar", CheckVariant.InOrder)],
-                         """foo
-                            def
-                            bar""")
-
-  def test_NotAssertions(self):
-    self.__matchMulti([("foo", CheckVariant.Not)],
-                      """abc
-                         def""")
-    self.__notMatchMulti([("foo", CheckVariant.Not)],
-                         """abc foo
-                            def""")
-    self.__notMatchMulti([("foo", CheckVariant.Not),
-                          ("bar", CheckVariant.Not)],
-                         """abc
-                            def bar""")
-
-  def test_LineOnlyMatchesOnce(self):
-    self.__matchMulti([("foo", CheckVariant.DAG),
-                       ("foo", CheckVariant.DAG)],
-                       """foo
-                          foo""")
-    self.__notMatchMulti([("foo", CheckVariant.DAG),
-                          ("foo", CheckVariant.DAG)],
-                          """foo
-                             bar""")
-
-class TestOutputFile_Parse(unittest.TestCase):
-  def __parsesTo(self, string, expected):
-    if isinstance(string, str):
-      string = unicode(string)
-    outputStream = io.StringIO(string)
-    return self.assertEqual(checker.OutputFile(outputStream).groups, expected)
-
-  def test_NoInput(self):
-    self.__parsesTo(None, [])
-    self.__parsesTo("", [])
-
-  def test_SingleGroup(self):
-    self.__parsesTo("""begin_compilation
-                         method "MyMethod"
-                       end_compilation
-                       begin_cfg
-                         name "pass1"
-                         foo
-                         bar
-                       end_cfg""",
-                    [ checker.OutputGroup("MyMethod pass1", [ "foo", "bar" ]) ])
-
-  def test_MultipleGroups(self):
-    self.__parsesTo("""begin_compilation
-                         name "xyz1"
-                         method "MyMethod1"
-                         date 1234
-                       end_compilation
-                       begin_cfg
-                         name "pass1"
-                         foo
-                         bar
-                       end_cfg
-                       begin_cfg
-                         name "pass2"
-                         abc
-                         def
-                       end_cfg""",
-                    [ checker.OutputGroup("MyMethod1 pass1", [ "foo", "bar" ]),
-                      checker.OutputGroup("MyMethod1 pass2", [ "abc", "def" ]) ])
-
-    self.__parsesTo("""begin_compilation
-                         name "xyz1"
-                         method "MyMethod1"
-                         date 1234
-                       end_compilation
-                       begin_cfg
-                         name "pass1"
-                         foo
-                         bar
-                       end_cfg
-                       begin_compilation
-                         name "xyz2"
-                         method "MyMethod2"
-                         date 5678
-                       end_compilation
-                       begin_cfg
-                         name "pass2"
-                         abc
-                         def
-                       end_cfg""",
-                    [ checker.OutputGroup("MyMethod1 pass1", [ "foo", "bar" ]),
-                      checker.OutputGroup("MyMethod2 pass2", [ "abc", "def" ]) ])
-
-class TestCheckFile_Parse(unittest.TestCase):
-  def __parsesTo(self, string, expected):
-    if isinstance(string, str):
-      string = unicode(string)
-    checkStream = io.StringIO(string)
-    return self.assertEqual(checker.CheckFile("CHECK", checkStream).groups, expected)
-
-  def test_NoInput(self):
-    self.__parsesTo(None, [])
-    self.__parsesTo("", [])
-
-  def test_SingleGroup(self):
-    self.__parsesTo("""// CHECK-START: Example Group
-                       // CHECK:  foo
-                       // CHECK:    bar""",
-                    [ checker.CheckGroup("Example Group", prepareChecks([ "foo", "bar" ])) ])
-
-  def test_MultipleGroups(self):
-    self.__parsesTo("""// CHECK-START: Example Group1
-                       // CHECK: foo
-                       // CHECK: bar
-                       // CHECK-START: Example Group2
-                       // CHECK: abc
-                       // CHECK: def""",
-                    [ checker.CheckGroup("Example Group1", prepareChecks([ "foo", "bar" ])),
-                      checker.CheckGroup("Example Group2", prepareChecks([ "abc", "def" ])) ])
-
-  def test_CheckVariants(self):
-    self.__parsesTo("""// CHECK-START: Example Group
-                       // CHECK:     foo
-                       // CHECK-NOT: bar
-                       // CHECK-DAG: abc
-                       // CHECK-DAG: def""",
-                    [ checker.CheckGroup("Example Group",
-                                         prepareChecks([ ("foo", CheckVariant.InOrder),
-                                                         ("bar", CheckVariant.Not),
-                                                         ("abc", CheckVariant.DAG),
-                                                         ("def", CheckVariant.DAG) ])) ])
-
-if __name__ == '__main__':
-  checker.Logger.Verbosity = checker.Logger.Level.NoOutput
-  unittest.main()
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index a3870362c2..8ce19ddb93 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -109,12 +109,6 @@
   bug: 19165288
 },
 {
-  description: "Bug in libcore",
-  result: EXEC_FAILED,
-  names: ["libcore.javax.crypto.ECDHKeyAgreementTest#testInit_withUnsupportedPrivateKeyType"],
-  bug: 19730263
-},
-{
   description: "Needs to be run as root",
   result: EXEC_FAILED,
   modes: [host],
@@ -130,5 +124,11 @@
   modes: [device],
   result: EXEC_FAILED,
   names: ["org.apache.harmony.tests.java.lang.ProcessManagerTest#testEnvironment"]
+},
+{
+  description: "Crypto failures",
+  result: EXEC_FAILED,
+  names: ["libcore.javax.crypto.CipherTest#testCipher_ShortBlock_Failure",
+          "libcore.javax.crypto.CipherTest#testCipher_Success"]
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 1dd443b2bc..17c4556e40 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -21,11 +21,10 @@ fi
 
 # Jar containing all the tests.
 test_jar=out/host/linux-x86/framework/apache-harmony-jdwp-tests-hostdex.jar
-junit_jar=out/host/linux-x86/framework/junit.jar
 
 if [ ! -f $test_jar -o ! -f $junit_jar ]; then
   echo "Before running, you must build jdwp tests and vogar:" \
-       "make junit apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
+       "make apache-harmony-jdwp-tests-hostdex vogar vogar.jar"
   exit 1
 fi
 
@@ -80,7 +79,6 @@ vogar $vm_command \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
       --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
       --classpath $test_jar \
-      --classpath $junit_jar \
       --vm-arg -Xcompiler-option --vm-arg --compiler-backend=Optimizing \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       org.apache.harmony.jpda.tests.share.AllTests