138 files changed, 7897 insertions, 2993 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b87201ad8d..beb34dce37 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -21,6 +21,7 @@ include art/build/Android.common_build.mk
 LIBART_COMPILER_SRC_FILES := \
 	compiled_method.cc \
 	dex/global_value_numbering.cc \
+	dex/gvn_dead_code_elimination.cc \
 	dex/local_value_numbering.cc \
 	dex/quick/arm/assemble_arm.cc \
 	dex/quick/arm/call_arm.cc \
@@ -100,6 +101,7 @@ LIBART_COMPILER_SRC_FILES := \
 	optimizing/inliner.cc \
 	optimizing/instruction_simplifier.cc \
 	optimizing/intrinsics.cc \
+	optimizing/intrinsics_arm.cc \
 	optimizing/intrinsics_arm64.cc \
 	optimizing/intrinsics_x86_64.cc \
 	optimizing/licm.cc \
@@ -117,7 +119,6 @@ LIBART_COMPILER_SRC_FILES := \
 	optimizing/primitive_type_propagation.cc \
 	optimizing/reference_type_propagation.cc \
 	trampolines/trampoline_compiler.cc \
-	utils/arena_allocator.cc \
 	utils/arena_bit_vector.cc \
 	utils/arm/assembler_arm.cc \
 	utils/arm/assembler_arm32.cc \
@@ -135,7 +136,6 @@ LIBART_COMPILER_SRC_FILES := \
 	utils/x86/managed_register_x86.cc \
 	utils/x86_64/assembler_x86_64.cc \
 	utils/x86_64/managed_register_x86_64.cc \
-	utils/scoped_arena_allocator.cc \
 	utils/swap_space.cc \
 	buffered_output_stream.cc \
 	compiler.cc \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 7685200261..93d83c6fd4 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -240,6 +240,41 @@ class GlobalValueNumberingPass : public PassME {
 };
 
 /**
+ * @class DeadCodeEliminationPass
+ * @brief Performs the GVN-based dead code elimination pass.
+ */
+class DeadCodeEliminationPass : public PassME {
+ public:
+  DeadCodeEliminationPass() : PassME("DCE", kPreOrderDFSTraversal, "4_post_dce_cfg") {
+  }
+
+  bool Gate(const PassDataHolder* data) const OVERRIDE {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    return c_unit->mir_graph->EliminateDeadCodeGate();
+  }
+
+  bool Worker(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
+    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+    DCHECK(c_unit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
+    return c_unit->mir_graph->EliminateDeadCode(bb);
+  }
+
+  void End(PassDataHolder* data) const OVERRIDE {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph->EliminateDeadCodeEnd();
+    down_cast<PassMEDataHolder*>(data)->dirty = !c_unit->mir_graph->MirSsaRepUpToDate();
+  }
+};
+
+/**
  * @class BBCombine
  * @brief Perform the basic block combination pass.
  */
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 0c46d4347d..dceea240fa 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -21,11 +21,11 @@
 #include <string>
 #include <vector>
 
+#include "base/arena_allocator.h"
+#include "base/scoped_arena_allocator.h"
 #include "base/timing_logger.h"
 #include "invoke_type.h"
 #include "safe_map.h"
-#include "utils/arena_allocator.h"
-#include "utils/scoped_arena_allocator.h"
 
 namespace art {
 
diff --git a/compiler/dex/dex_flags.h b/compiler/dex/dex_flags.h
index eaf272bb55..e8eb40ccd2 100644
--- a/compiler/dex/dex_flags.h
+++ b/compiler/dex/dex_flags.h
@@ -27,6 +27,7 @@ enum OptControlVector {
   kNullCheckElimination,
   kClassInitCheckElimination,
   kGlobalValueNumbering,
+  kGvnDeadCodeElimination,
   kLocalValueNumbering,
   kPromoteRegs,
   kTrackLiveTemps,
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index f7968c225a..7e916bee4a 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -238,7 +238,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
   bool is_volatile;
   bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put,
                                                     &field_offset, &is_volatile);
-  if (fast_path && !is_volatile && IsUint(16, field_offset.Int32Value())) {
+  if (fast_path && !is_volatile && IsUint<16>(field_offset.Int32Value())) {
     VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode())
                    << " to " << Instruction::Name(new_opcode)
                    << " by replacing field index " << field_idx
@@ -274,7 +274,7 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst,
                                              &target_method, &vtable_idx,
                                              &direct_code, &direct_method);
   if (fast_path && original_invoke_type == invoke_type) {
-    if (vtable_idx >= 0 && IsUint(16, vtable_idx)) {
+    if (vtable_idx >= 0 && IsUint<16>(vtable_idx)) {
       VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode())
                      << "(" << PrettyMethod(method_idx, GetDexFile(), true) << ")"
                      << " to " << Instruction::Name(new_opcode)
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index a8fd8122ff..ab3c946897 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -28,7 +28,7 @@ GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAlloc
       allocator_(allocator),
       bbs_processed_(0u),
       max_bbs_to_process_(kMaxBbsToProcessMultiplyFactor * mir_graph_->GetNumReachableBlocks()),
-      last_value_(0u),
+      last_value_(kNullValue),
       modifications_allowed_(true),
       mode_(mode),
       global_value_map_(std::less<uint64_t>(), allocator->Adapter()),
@@ -128,7 +128,11 @@ bool GlobalValueNumbering::FinishBasicBlock(BasicBlock* bb) {
   merge_lvns_.clear();
 
   bool change = (lvns_[bb->id] == nullptr) || !lvns_[bb->id]->Equals(*work_lvn_);
-  if (change) {
+  if (mode_ == kModeGvn) {
+    // In GVN mode, keep the latest LVN even if Equals() indicates no change. This is
+    // to keep the correct values of fields that do not contribute to Equals() as long
+    // as they depend only on predecessor LVNs' fields that do contribute to Equals().
+    // Currently, that's LVN::merge_map_ used by LVN::GetStartingVregValueNumberImpl().
     std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]);
     lvns_[bb->id] = work_lvn_.release();
   } else {
@@ -178,7 +182,7 @@ bool GlobalValueNumbering::NullCheckedInAllPredecessors(
       }
       // IF_EQZ/IF_NEZ checks some sreg, see if that sreg contains the value_name.
       int s_reg = pred_bb->last_mir_insn->ssa_rep->uses[0];
-      if (!pred_lvn->IsSregValue(s_reg, value_name)) {
+      if (pred_lvn->GetSregValue(s_reg) != value_name) {
         return false;
       }
     }
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index cdafc68070..6fa658c0cc 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -17,12 +17,12 @@
 #ifndef ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
 #define ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
 
+#include "base/arena_object.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "mir_graph.h"
 #include "compiler_ir.h"
 #include "dex_flags.h"
-#include "utils/arena_object.h"
 
 namespace art {
 
@@ -31,6 +31,9 @@ class MirFieldInfo;
 
 class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
  public:
+  static constexpr uint16_t kNoValue = 0xffffu;
+  static constexpr uint16_t kNullValue = 1u;
+
   enum Mode {
     kModeGvn,
     kModeGvnPostProcessing,
@@ -51,6 +54,14 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
   GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator, Mode mode);
   ~GlobalValueNumbering();
 
+  CompilationUnit* GetCompilationUnit() const {
+    return cu_;
+  }
+
+  MIRGraph* GetMirGraph() const {
+    return mir_graph_;
+  }
+
   // Prepare LVN for the basic block.
   LocalValueNumbering* PrepareBasicBlock(BasicBlock* bb,
                                          ScopedArenaAllocator* allocator = nullptr);
@@ -70,9 +81,10 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
     return modifications_allowed_ && Good();
   }
 
- private:
-  static constexpr uint16_t kNoValue = 0xffffu;
+  // Retrieve the LVN with GVN results for a given BasicBlock.
+  const LocalValueNumbering* GetLvn(BasicBlockId bb_id) const;
 
+ private:
   // Allocate a new value name.
   uint16_t NewValueName();
 
@@ -88,7 +100,7 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
   uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
     uint16_t res;
     uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    ValueMap::iterator lb = global_value_map_.lower_bound(key);
+    auto lb = global_value_map_.lower_bound(key);
     if (lb != global_value_map_.end() && lb->first == key) {
       res = lb->second;
     } else {
@@ -99,10 +111,10 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
   }
 
   // Look up a value in the global value map, don't add a new entry if there was none before.
-  uint16_t FindValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
+  uint16_t FindValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) const {
     uint16_t res;
     uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    ValueMap::iterator lb = global_value_map_.lower_bound(key);
+    auto lb = global_value_map_.lower_bound(key);
     if (lb != global_value_map_.end() && lb->first == key) {
       res = lb->second;
     } else {
@@ -111,18 +123,6 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
     return res;
   }
 
-  // Check if the exact value is stored in the global value map.
-  bool HasValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier,
-                uint16_t value) const {
-    DCHECK(value != 0u || !Good());
-    DCHECK_LE(value, last_value_);
-    // This is equivalent to value == LookupValue(op, operand1, operand2, modifier)
-    // except that it doesn't add an entry to the global value map if it's not there.
-    uint64_t key = BuildKey(op, operand1, operand2, modifier);
-    ValueMap::const_iterator it = global_value_map_.find(key);
-    return (it != global_value_map_.end() && it->second == value);
-  }
-
   // Get an instance field id.
   uint16_t GetIFieldId(MIR* mir) {
     return GetMirGraph()->GetGvnIFieldId(mir);
@@ -200,14 +200,6 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
 
   bool DivZeroCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
 
-  CompilationUnit* GetCompilationUnit() const {
-    return cu_;
-  }
-
-  MIRGraph* GetMirGraph() const {
-    return mir_graph_;
-  }
-
   ScopedArenaAllocator* Allocator() const {
     return allocator_;
   }
@@ -255,6 +247,13 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
 };
 std::ostream& operator<<(std::ostream& os, const GlobalValueNumbering::Mode& rhs);
 
+inline const LocalValueNumbering* GlobalValueNumbering::GetLvn(BasicBlockId bb_id) const {
+  DCHECK_EQ(mode_, kModeGvnPostProcessing);
+  DCHECK_LT(bb_id, lvns_.size());
+  DCHECK(lvns_[bb_id] != nullptr);
+  return lvns_[bb_id];
+}
+
 inline void GlobalValueNumbering::StartPostProcessing() {
   DCHECK(Good());
   DCHECK_EQ(mode_, kModeGvn);
@@ -271,8 +270,7 @@ template <typename Container>  // Container of MirIFieldLoweringInfo or MirSFiel
 uint16_t* GlobalValueNumbering::PrepareGvnFieldIds(ScopedArenaAllocator* allocator,
                                                    const Container& field_infos) {
   size_t size = field_infos.size();
-  uint16_t* field_ids = reinterpret_cast<uint16_t*>(allocator->Alloc(size * sizeof(uint16_t),
-                                                                     kArenaAllocMisc));
+  uint16_t* field_ids = allocator->AllocArray<uint16_t>(size, kArenaAllocMisc);
   for (size_t i = 0u; i != size; ++i) {
     size_t idx = i;
     const MirFieldInfo& cur_info = field_infos[i];
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index f71b7ae359..54e34eaa81 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -134,8 +134,8 @@ class GlobalValueNumberingTest : public testing::Test {
     { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
 #define DEF_PHI2(bb, reg, src1, src2) \
     { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
-#define DEF_DIV_REM(bb, opcode, result, dividend, divisor) \
-    { bb, opcode, 0u, 0u, 2, { dividend, divisor }, 1, { result } }
+#define DEF_BINOP(bb, opcode, result, src1, src2) \
+    { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
     cu_.mir_graph->ifield_lowering_infos_.clear();
@@ -229,7 +229,7 @@ class GlobalValueNumberingTest : public testing::Test {
 
   void DoPrepareMIRs(const MIRDef* defs, size_t count) {
     mir_count_ = count;
-    mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+    mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
     ssa_reps_.resize(count);
     for (size_t i = 0u; i != count; ++i) {
       const MIRDef* def = &defs[i];
@@ -251,8 +251,8 @@ class GlobalValueNumberingTest : public testing::Test {
         ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
                   SGetOrSPutMemAccessType(def->opcode));
       } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
-        mir->meta.phi_incoming = static_cast<BasicBlockId*>(
-            allocator_->Alloc(def->num_uses * sizeof(BasicBlockId), kArenaAllocDFInfo));
+        mir->meta.phi_incoming =
+            allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo);
         ASSERT_EQ(def->num_uses, bb->predecessors.size());
         std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming);
       }
@@ -267,7 +267,6 @@ class GlobalValueNumberingTest : public testing::Test {
       mir->offset = i;  // LVN uses offset only for debug output
       mir->optimization_flags = 0u;
     }
-    mirs_[count - 1u].next = nullptr;
     DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
         cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
     code_item->insns_size_in_code_units_ = 2u * count;
@@ -279,6 +278,20 @@ class GlobalValueNumberingTest : public testing::Test {
     DoPrepareMIRs(defs, count);
   }
 
+  void DoPrepareVregToSsaMapExit(BasicBlockId bb_id, const int32_t* map, size_t count) {
+    BasicBlock* bb = cu_.mir_graph->GetBasicBlock(bb_id);
+    ASSERT_TRUE(bb != nullptr);
+    ASSERT_TRUE(bb->data_flow_info != nullptr);
+    bb->data_flow_info->vreg_to_ssa_map_exit =
+        cu_.arena.AllocArray<int32_t>(count, kArenaAllocDFInfo);
+    std::copy_n(map, count, bb->data_flow_info->vreg_to_ssa_map_exit);
+  }
+
+  template <size_t count>
+  void PrepareVregToSsaMapExit(BasicBlockId bb_id, const int32_t (&map)[count]) {
+    DoPrepareVregToSsaMapExit(bb_id, map, count);
+  }
+
   void PerformGVN() {
     DoPerformGVN<LoopRepeatingTopologicalSortIterator>();
   }
@@ -294,9 +307,9 @@ class GlobalValueNumberingTest : public testing::Test {
     cu_.mir_graph->ComputeDominators();
     cu_.mir_graph->ComputeTopologicalSortOrder();
     cu_.mir_graph->SSATransformationEnd();
-    cu_.mir_graph->temp_.gvn.ifield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+    cu_.mir_graph->temp_.gvn.ifield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
         allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
-    cu_.mir_graph->temp_.gvn.sfield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+    cu_.mir_graph->temp_.gvn.sfield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
         allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
     ASSERT_TRUE(gvn_ == nullptr);
     gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
@@ -348,6 +361,10 @@ class GlobalValueNumberingTest : public testing::Test {
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
+    // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
+    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    cu_.mir_graph->reg_location_ =
+        cu_.arena.AllocArray<RegLocation>(kMaxSsaRegs, kArenaAllocRegAlloc);
     // Bind all possible sregs to live vregs for test purposes.
     live_in_v_->SetInitialBits(kMaxSsaRegs);
     cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
@@ -1570,6 +1587,40 @@ TEST_F(GlobalValueNumberingTestLoop, Phi) {
   EXPECT_NE(value_names_[4], value_names_[3]);
 }
 
+TEST_F(GlobalValueNumberingTestLoop, IFieldLoopVariable) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 0),
+      DEF_IPUT(3, Instruction::IPUT, 0u, 100u, 0u),
+      DEF_IGET(4, Instruction::IGET, 2u, 100u, 0u),
+      DEF_BINOP(4, Instruction::ADD_INT, 3u, 2u, 101u),
+      DEF_IPUT(4, Instruction::IPUT, 3u, 100u, 0u),
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN();
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[2], value_names_[0]);
+  EXPECT_NE(value_names_[3], value_names_[0]);
+  EXPECT_NE(value_names_[3], value_names_[2]);
+
+
+  // Set up vreg_to_ssa_map_exit for prologue and loop and set post-processing mode
+  // as needed for GetStartingVregValueNumber().
+  const int32_t prologue_vreg_to_ssa_map_exit[] = { 0 };
+  const int32_t loop_vreg_to_ssa_map_exit[] = { 3 };
+  PrepareVregToSsaMapExit(3, prologue_vreg_to_ssa_map_exit);
+  PrepareVregToSsaMapExit(4, loop_vreg_to_ssa_map_exit);
+  gvn_->StartPostProcessing();
+
+  // Check that vreg 0 has the same value number as the result of IGET 2u.
+  const LocalValueNumbering* loop = gvn_->GetLvn(4);
+  EXPECT_EQ(value_names_[2], loop->GetStartingVregValueNumber(0));
+}
+
 TEST_F(GlobalValueNumberingTestCatch, IFields) {
   static const IFieldDef ifields[] = {
       { 0u, 1u, 0u, false, kDexMemAccessWord },
@@ -2225,18 +2276,18 @@ TEST_F(GlobalValueNumberingTest, NormalPathToCatchEntry) {
 
 TEST_F(GlobalValueNumberingTestDiamond, DivZeroCheckDiamond) {
   static const MIRDef mirs[] = {
-      DEF_DIV_REM(3u, Instruction::DIV_INT, 1u, 20u, 21u),
-      DEF_DIV_REM(3u, Instruction::DIV_INT, 2u, 24u, 21u),
-      DEF_DIV_REM(3u, Instruction::DIV_INT, 3u, 20u, 23u),
-      DEF_DIV_REM(4u, Instruction::DIV_INT, 4u, 24u, 22u),
-      DEF_DIV_REM(4u, Instruction::DIV_INT, 9u, 24u, 25u),
-      DEF_DIV_REM(5u, Instruction::DIV_INT, 5u, 24u, 21u),
-      DEF_DIV_REM(5u, Instruction::DIV_INT, 10u, 24u, 26u),
+      DEF_BINOP(3u, Instruction::DIV_INT, 1u, 20u, 21u),
+      DEF_BINOP(3u, Instruction::DIV_INT, 2u, 24u, 21u),
+      DEF_BINOP(3u, Instruction::DIV_INT, 3u, 20u, 23u),
+      DEF_BINOP(4u, Instruction::DIV_INT, 4u, 24u, 22u),
+      DEF_BINOP(4u, Instruction::DIV_INT, 9u, 24u, 25u),
+      DEF_BINOP(5u, Instruction::DIV_INT, 5u, 24u, 21u),
+      DEF_BINOP(5u, Instruction::DIV_INT, 10u, 24u, 26u),
       DEF_PHI2(6u, 27u, 25u, 26u),
-      DEF_DIV_REM(6u, Instruction::DIV_INT, 12u, 20u, 27u),
-      DEF_DIV_REM(6u, Instruction::DIV_INT, 6u, 24u, 21u),
-      DEF_DIV_REM(6u, Instruction::DIV_INT, 7u, 20u, 23u),
-      DEF_DIV_REM(6u, Instruction::DIV_INT, 8u, 20u, 22u),
+      DEF_BINOP(6u, Instruction::DIV_INT, 12u, 20u, 27u),
+      DEF_BINOP(6u, Instruction::DIV_INT, 6u, 24u, 21u),
+      DEF_BINOP(6u, Instruction::DIV_INT, 7u, 20u, 23u),
+      DEF_BINOP(6u, Instruction::DIV_INT, 8u, 20u, 22u),
   };
 
   static const bool expected_ignore_div_zero_check[] = {
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
new file mode 100644
index 0000000000..2e7f0328d2
--- /dev/null
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -0,0 +1,1391 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+
+#include "gvn_dead_code_elimination.h"
+
+#include "base/bit_vector-inl.h"
+#include "base/macros.h"
+#include "compiler_enums.h"
+#include "dataflow_iterator-inl.h"
+#include "dex_instruction.h"
+#include "dex/mir_graph.h"
+#include "local_value_numbering.h"
+#include "utils/arena_bit_vector.h"
+
+namespace art {
+
+constexpr uint16_t GvnDeadCodeElimination::kNoValue;
+constexpr uint16_t GvnDeadCodeElimination::kNPos;
+
+inline uint16_t GvnDeadCodeElimination::MIRData::PrevChange(int v_reg) const {
+  DCHECK(has_def);
+  DCHECK(v_reg == vreg_def || v_reg == vreg_def + 1);
+  return (v_reg == vreg_def) ? prev_value.change : prev_value_high.change;
+}
+
+inline void GvnDeadCodeElimination::MIRData::SetPrevChange(int v_reg, uint16_t change) {
+  DCHECK(has_def);
+  DCHECK(v_reg == vreg_def || v_reg == vreg_def + 1);
+  if (v_reg == vreg_def) {
+    prev_value.change = change;
+  } else {
+    prev_value_high.change = change;
+  }
+}
+
+inline void GvnDeadCodeElimination::MIRData::RemovePrevChange(int v_reg, MIRData* prev_data) {
+  DCHECK_NE(PrevChange(v_reg), kNPos);
+  DCHECK(v_reg == prev_data->vreg_def || v_reg == prev_data->vreg_def + 1);
+  if (vreg_def == v_reg) {
+    if (prev_data->vreg_def == v_reg) {
+      prev_value = prev_data->prev_value;
+      low_def_over_high_word = prev_data->low_def_over_high_word;
+    } else {
+      prev_value = prev_data->prev_value_high;
+      low_def_over_high_word =
+          prev_data->prev_value_high.value != kNPos && !prev_data->high_def_over_low_word;
+    }
+  } else {
+    if (prev_data->vreg_def == v_reg) {
+      prev_value_high = prev_data->prev_value;
+      high_def_over_low_word =
+          prev_data->prev_value.value != kNPos && !prev_data->low_def_over_high_word;
+    } else {
+      prev_value_high = prev_data->prev_value_high;
+      high_def_over_low_word = prev_data->high_def_over_low_word;
+    }
+  }
+}
+
+GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc)
+    : num_vregs_(num_vregs),
+      vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)),
+      mir_data_(alloc->Adapter()) {
+  mir_data_.reserve(100);
+}
+
+inline void GvnDeadCodeElimination::VRegChains::Reset() {
+  DCHECK(mir_data_.empty());
+  std::fill_n(vreg_data_, num_vregs_, VRegValue());
+}
+
+void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool wide,
+                                                       uint16_t new_value) {
+  uint16_t pos = mir_data_.size();
+  mir_data_.emplace_back(mir);
+  MIRData* data = &mir_data_.back();
+  data->has_def = true;
+  data->wide_def = wide;
+  data->vreg_def = v_reg;
+
+  if (vreg_data_[v_reg].change != kNPos &&
+      mir_data_[vreg_data_[v_reg].change].vreg_def + 1 == v_reg) {
+    data->low_def_over_high_word = true;
+  }
+  data->prev_value = vreg_data_[v_reg];
+  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+  vreg_data_[v_reg].value = new_value;
+  vreg_data_[v_reg].change = pos;
+
+  if (wide) {
+    if (vreg_data_[v_reg + 1].change != kNPos &&
+        mir_data_[vreg_data_[v_reg + 1].change].vreg_def == v_reg + 1) {
+      data->high_def_over_low_word = true;
+    }
+    data->prev_value_high = vreg_data_[v_reg + 1];
+    DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+    vreg_data_[v_reg + 1].value = new_value;
+    vreg_data_[v_reg + 1].change = pos;
+  }
+}
+
+inline void GvnDeadCodeElimination::VRegChains::AddMIRWithoutDef(MIR* mir) {
+  mir_data_.emplace_back(mir);
+}
+
+void GvnDeadCodeElimination::VRegChains::RemoveLastMIRData() {
+  MIRData* data = LastMIRData();
+  if (data->has_def) {
+    DCHECK_EQ(vreg_data_[data->vreg_def].change, NumMIRs() - 1u);
+    vreg_data_[data->vreg_def] = data->prev_value;
+    if (data->wide_def) {
+      DCHECK_EQ(vreg_data_[data->vreg_def + 1].change, NumMIRs() - 1u);
+      vreg_data_[data->vreg_def + 1] = data->prev_value_high;
+    }
+  }
+  mir_data_.pop_back();
+}
+
+void GvnDeadCodeElimination::VRegChains::RemoveTrailingNops() {
+  // There's at least one NOP to drop. There may be more.
+  MIRData* last_data = LastMIRData();
+  DCHECK(!last_data->must_keep && !last_data->has_def);
+  do {
+    DCHECK_EQ(static_cast<int>(last_data->mir->dalvikInsn.opcode), static_cast<int>(kMirOpNop));
+    mir_data_.pop_back();
+    if (mir_data_.empty()) {
+      break;
+    }
+    last_data = LastMIRData();
+  } while (!last_data->must_keep && !last_data->has_def);
+}
+
+inline size_t GvnDeadCodeElimination::VRegChains::NumMIRs() const {
+  return mir_data_.size();
+}
+
+inline GvnDeadCodeElimination::MIRData* GvnDeadCodeElimination::VRegChains::GetMIRData(size_t pos) {
+  DCHECK_LT(pos, mir_data_.size());
+  return &mir_data_[pos];
+}
+
+inline GvnDeadCodeElimination::MIRData* GvnDeadCodeElimination::VRegChains::LastMIRData() {
+  DCHECK(!mir_data_.empty());
+  return &mir_data_.back();
+}
+
+uint32_t GvnDeadCodeElimination::VRegChains::NumVRegs() const {
+  return num_vregs_;
+}
+
+void GvnDeadCodeElimination::VRegChains::InsertInitialValueHigh(int v_reg, uint16_t value) {
+  DCHECK_NE(value, kNoValue);
+  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+  uint16_t change = vreg_data_[v_reg].change;
+  if (change == kNPos) {
+    vreg_data_[v_reg].value = value;
+  } else {
+    while (true) {
+      MIRData* data = &mir_data_[change];
+      DCHECK(data->vreg_def == v_reg || data->vreg_def + 1 == v_reg);
+      if (data->vreg_def == v_reg) {  // Low word, use prev_value.
+        if (data->prev_value.change == kNPos) {
+          DCHECK_EQ(data->prev_value.value, kNoValue);
+          data->prev_value.value = value;
+          data->low_def_over_high_word = true;
+          break;
+        }
+        change = data->prev_value.change;
+      } else {  // High word, use prev_value_high.
+        if (data->prev_value_high.change == kNPos) {
+          DCHECK_EQ(data->prev_value_high.value, kNoValue);
+          data->prev_value_high.value = value;
+          break;
+        }
+        change = data->prev_value_high.change;
+      }
+    }
+  }
+}
+
+void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool wide,
+                                                                const LocalValueNumbering* lvn) {
+  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+  if (!wide) {
+    if (vreg_data_[v_reg].value == kNoValue) {
+      uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg);
+      if (old_value == kNoValue) {
+        // Maybe there was a wide value in v_reg before. Do not check for wide value in v_reg-1,
+        // that will be done only if we see a definition of v_reg-1, otherwise it's unnecessary.
+        old_value = lvn->GetStartingVregValueNumberWide(v_reg);
+        if (old_value != kNoValue) {
+          InsertInitialValueHigh(v_reg + 1, old_value);
+        }
+      }
+      vreg_data_[v_reg].value = old_value;
+    }
+  } else {
+    DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+    bool check_high = true;
+    if (vreg_data_[v_reg].value == kNoValue) {
+      uint16_t old_value = lvn->GetStartingVregValueNumberWide(v_reg);
+      if (old_value != kNoValue) {
+        InsertInitialValueHigh(v_reg + 1, old_value);
+        check_high = false;  // High word has been processed.
+      } else {
+        // Maybe there was a narrow value before. Do not check for wide value in v_reg-1,
+        // that will be done only if we see a definition of v_reg-1, otherwise it's unnecessary.
+        old_value = lvn->GetStartingVregValueNumber(v_reg);
+      }
+      vreg_data_[v_reg].value = old_value;
+    }
+    if (check_high && vreg_data_[v_reg + 1].value == kNoValue) {
+      uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg + 1);
+      if (old_value == kNoValue && static_cast<size_t>(v_reg + 2) < num_vregs_) {
+        // Maybe there was a wide value before.
+        old_value = lvn->GetStartingVregValueNumberWide(v_reg + 1);
+        if (old_value != kNoValue) {
+          InsertInitialValueHigh(v_reg + 2, old_value);
+        }
+      }
+      vreg_data_[v_reg + 1].value = old_value;
+    }
+  }
+}
+
+inline uint16_t GvnDeadCodeElimination::VRegChains::LastChange(int v_reg) {
+  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+  return vreg_data_[v_reg].change;
+}
+
+inline uint16_t GvnDeadCodeElimination::VRegChains::CurrentValue(int v_reg) {
+  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+  return vreg_data_[v_reg].value;
+}
+
+uint16_t GvnDeadCodeElimination::VRegChains::FindKillHead(int v_reg, uint16_t cutoff) {
+  uint16_t current_value = this->CurrentValue(v_reg);
+  DCHECK_NE(current_value, kNoValue);
+  uint16_t change = LastChange(v_reg);
+  DCHECK_LT(change, mir_data_.size());
+  DCHECK_GE(change, cutoff);
+  bool match_high_word = (mir_data_[change].vreg_def != v_reg);
+  do {
+    MIRData* data = &mir_data_[change];
+    DCHECK(data->vreg_def == v_reg || data->vreg_def + 1 == v_reg);
+    if (data->vreg_def == v_reg) {  // Low word, use prev_value.
+      if (data->prev_value.value == current_value &&
+          match_high_word == data->low_def_over_high_word) {
+        break;
+      }
+      change = data->prev_value.change;
+    } else {  // High word, use prev_value_high.
+      if (data->prev_value_high.value == current_value &&
+          match_high_word != data->high_def_over_low_word) {
+        break;
+      }
+      change = data->prev_value_high.change;
+    }
+    if (change < cutoff) {
+      change = kNPos;
+    }
+  } while (change != kNPos);
+  return change;
+}
+
+uint16_t GvnDeadCodeElimination::VRegChains::FindFirstChangeAfter(int v_reg,
+                                                                  uint16_t change) const {
+  DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+  DCHECK_LT(change, mir_data_.size());
+  uint16_t result = kNPos;
+  uint16_t search_change = vreg_data_[v_reg].change;
+  while (search_change != kNPos && search_change > change) {
+    result = search_change;
+    search_change = mir_data_[search_change].PrevChange(v_reg);
+  }
+  return result;
+}
+
+void GvnDeadCodeElimination::VRegChains::ReplaceChange(uint16_t old_change, uint16_t new_change) {
+  const MIRData* old_data = GetMIRData(old_change);
+  DCHECK(old_data->has_def);
+  int count = old_data->wide_def ? 2 : 1;
+  for (int v_reg = old_data->vreg_def, end = old_data->vreg_def + count; v_reg != end; ++v_reg) {
+    uint16_t next_change = FindFirstChangeAfter(v_reg, old_change);
+    if (next_change == kNPos) {
+      DCHECK_EQ(vreg_data_[v_reg].change, old_change);
+      vreg_data_[v_reg].change = new_change;
+    } else {
+      DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), old_change);
+      mir_data_[next_change].SetPrevChange(v_reg, new_change);
+    }
+  }
+}
+
+void GvnDeadCodeElimination::VRegChains::RemoveChange(uint16_t change) {
+  MIRData* data = &mir_data_[change];
+  DCHECK(data->has_def);
+  int count = data->wide_def ? 2 : 1;
+  for (int v_reg = data->vreg_def, end = data->vreg_def + count; v_reg != end; ++v_reg) {
+    uint16_t next_change = FindFirstChangeAfter(v_reg, change);
+    if (next_change == kNPos) {
+      DCHECK_EQ(vreg_data_[v_reg].change, change);
+      vreg_data_[v_reg] = (data->vreg_def == v_reg) ? data->prev_value : data->prev_value_high;
+    } else {
+      DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), change);
+      mir_data_[next_change].RemovePrevChange(v_reg, data);
+    }
+  }
+}
+
+inline bool GvnDeadCodeElimination::VRegChains::IsTopChange(uint16_t change) const {
+  DCHECK_LT(change, mir_data_.size());
+  const MIRData* data = &mir_data_[change];
+  DCHECK(data->has_def);
+  DCHECK_LT(data->wide_def ? data->vreg_def + 1u : data->vreg_def, num_vregs_);
+  return vreg_data_[data->vreg_def].change == change &&
+      (!data->wide_def || vreg_data_[data->vreg_def + 1u].change == change);
+}
+
+bool GvnDeadCodeElimination::VRegChains::IsSRegUsed(uint16_t first_change, uint16_t last_change,
+                                                    int s_reg) const {
+  DCHECK_LE(first_change, last_change);
+  DCHECK_LE(last_change, mir_data_.size());
+  for (size_t c = first_change; c != last_change; ++c) {
+    SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
+    for (int i = 0; i != ssa_rep->num_uses; ++i) {
+      if (ssa_rep->uses[i] == s_reg) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void GvnDeadCodeElimination::VRegChains::RenameSRegUses(uint16_t first_change, uint16_t last_change,
+                                                        int old_s_reg, int new_s_reg, bool wide) {
+  for (size_t c = first_change; c != last_change; ++c) {
+    SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
+    for (int i = 0; i != ssa_rep->num_uses; ++i) {
+      if (ssa_rep->uses[i] == old_s_reg) {
+        ssa_rep->uses[i] = new_s_reg;
+        if (wide) {
+          ++i;
+          DCHECK_LT(i, ssa_rep->num_uses);
+          ssa_rep->uses[i] = new_s_reg + 1;
+        }
+      }
+    }
+  }
+}
+
+void GvnDeadCodeElimination::VRegChains::RenameVRegUses(uint16_t first_change, uint16_t last_change,
+                                                    int old_s_reg, int old_v_reg,
+                                                    int new_s_reg, int new_v_reg) {
+  for (size_t c = first_change; c != last_change; ++c) {
+    MIR* mir = mir_data_[c].mir;
+    if (IsInstructionBinOp2Addr(mir->dalvikInsn.opcode) &&
+        mir->ssa_rep->uses[0] == old_s_reg && old_v_reg != new_v_reg) {
+      // Rewrite binop_2ADDR with plain binop before doing the register rename.
+      ChangeBinOp2AddrToPlainBinOp(mir);
+    }
+    uint64_t df_attr = MIRGraph::GetDataFlowAttributes(mir);
+    size_t use = 0u;
+#define REPLACE_VREG(REG) \
+    if ((df_attr & DF_U##REG) != 0) {                                         \
+      if (mir->ssa_rep->uses[use] == old_s_reg) {                             \
+        DCHECK_EQ(mir->dalvikInsn.v##REG, static_cast<uint32_t>(old_v_reg));  \
+        mir->dalvikInsn.v##REG = new_v_reg;                                   \
+        mir->ssa_rep->uses[use] = new_s_reg;                                  \
+        if ((df_attr & DF_##REG##_WIDE) != 0) {                               \
+          DCHECK_EQ(mir->ssa_rep->uses[use + 1], old_s_reg + 1);              \
+          mir->ssa_rep->uses[use + 1] = new_s_reg + 1;                        \
+        }                                                                     \
+      }                                                                       \
+      use += ((df_attr & DF_##REG##_WIDE) != 0) ? 2 : 1;                      \
+    }
+    REPLACE_VREG(A)
+    REPLACE_VREG(B)
+    REPLACE_VREG(C)
+#undef REPLACE_VREG
+    // We may encounter an out-of-order Phi which we need to ignore, otherwise we should
+    // only be asked to rename registers specified by DF_UA, DF_UB and DF_UC.
+    DCHECK_EQ(use,
+              static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi
+              ? 0u
+              : static_cast<size_t>(mir->ssa_rep->num_uses));
+  }
+}
+
+GvnDeadCodeElimination::GvnDeadCodeElimination(const GlobalValueNumbering* gvn,
+                                         ScopedArenaAllocator* alloc)
+    : gvn_(gvn),
+      mir_graph_(gvn_->GetMirGraph()),
+      vreg_chains_(mir_graph_->GetNumOfCodeAndTempVRs(), alloc),
+      bb_(nullptr),
+      lvn_(nullptr),
+      no_uses_all_since_(0u),
+      unused_vregs_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)),
+      vregs_to_kill_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)),
+      kill_heads_(alloc->AllocArray<uint16_t>(vreg_chains_.NumVRegs(), kArenaAllocMisc)),
+      changes_to_kill_(alloc->Adapter()),
+      dependent_vregs_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)) {
+  changes_to_kill_.reserve(16u);
+}
+
+void GvnDeadCodeElimination::Apply(BasicBlock* bb) {
+  bb_ = bb;
+  lvn_ = gvn_->GetLvn(bb->id);
+
+  RecordPass();
+  BackwardPass();
+
+  DCHECK_EQ(no_uses_all_since_, 0u);
+  lvn_ = nullptr;
+  bb_ = nullptr;
+}
+
+void GvnDeadCodeElimination::RecordPass() {
+  // Record MIRs with vreg definition data, eliminate single instructions.
+  vreg_chains_.Reset();
+  DCHECK_EQ(no_uses_all_since_, 0u);
+  for (MIR* mir = bb_->first_mir_insn; mir != nullptr; mir = mir->next) {
+    if (RecordMIR(mir)) {
+      RecordPassTryToKillOverwrittenMoveOrMoveSrc();
+      RecordPassTryToKillLastMIR();
+    }
+  }
+}
+
+void GvnDeadCodeElimination::BackwardPass() {
+  // Now process MIRs in reverse order, trying to eliminate them.
+  unused_vregs_->ClearAllBits();  // Implicitly depend on all vregs at the end of BB.
+  while (vreg_chains_.NumMIRs() != 0u) {
+    if (BackwardPassTryToKillLastMIR()) {
+      continue;
+    }
+    BackwardPassProcessLastMIR();
+  }
+}
+
+void GvnDeadCodeElimination::KillMIR(MIRData* data) {
+  DCHECK(!data->must_keep);
+  DCHECK(!data->uses_all_vregs);
+  DCHECK(data->has_def);
+  DCHECK(data->mir->ssa_rep->num_defs == 1 || data->mir->ssa_rep->num_defs == 2);
+
+  KillMIR(data->mir);
+  data->has_def = false;
+  data->is_move = false;
+  data->is_move_src = false;
+}
+
+void GvnDeadCodeElimination::KillMIR(MIR* mir) {
+  mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+  mir->ssa_rep->num_uses = 0;
+  mir->ssa_rep->num_defs = 0;
+}
+
+void GvnDeadCodeElimination::ChangeBinOp2AddrToPlainBinOp(MIR* mir) {
+  mir->dalvikInsn.vC = mir->dalvikInsn.vB;
+  mir->dalvikInsn.vB = mir->dalvikInsn.vA;
+  mir->dalvikInsn.opcode = static_cast<Instruction::Code>(
+      mir->dalvikInsn.opcode - Instruction::ADD_INT_2ADDR +  Instruction::ADD_INT);
+}
+
+MIR* GvnDeadCodeElimination::CreatePhi(int s_reg, bool fp) {
+  int v_reg = mir_graph_->SRegToVReg(s_reg);
+  MIR* phi = mir_graph_->NewMIR();
+  phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
+  phi->dalvikInsn.vA = v_reg;
+  phi->offset = bb_->start_offset;
+  phi->m_unit_index = 0;  // Arbitrarily assign all Phi nodes to outermost method.
+
+  phi->ssa_rep = static_cast<struct SSARepresentation *>(mir_graph_->GetArena()->Alloc(
+      sizeof(SSARepresentation), kArenaAllocDFInfo));
+
+  mir_graph_->AllocateSSADefData(phi, 1);
+  phi->ssa_rep->defs[0] = s_reg;
+  phi->ssa_rep->fp_def[0] = fp;
+
+  size_t num_uses = bb_->predecessors.size();
+  mir_graph_->AllocateSSAUseData(phi, num_uses);
+  std::fill_n(phi->ssa_rep->fp_use, num_uses, fp);
+  size_t idx = 0u;
+  for (BasicBlockId pred_id : bb_->predecessors) {
+    BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id);
+    DCHECK(pred_bb != nullptr);
+    phi->ssa_rep->uses[idx] = pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
+    DCHECK_NE(phi->ssa_rep->uses[idx], INVALID_SREG);
+    idx++;
+  }
+
+  phi->meta.phi_incoming = static_cast<BasicBlockId*>(mir_graph_->GetArena()->Alloc(
+      sizeof(BasicBlockId) * num_uses, kArenaAllocDFInfo));
+  std::copy(bb_->predecessors.begin(), bb_->predecessors.end(), phi->meta.phi_incoming);
+  bb_->PrependMIR(phi);
+  return phi;
+}
+
+MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint16_t last_change,
+                                                      MIR* mir_to_kill) {
+  DCHECK(mir_to_kill->ssa_rep->num_defs == 1 || mir_to_kill->ssa_rep->num_defs == 2);
+  bool wide = (mir_to_kill->ssa_rep->num_defs != 1);
+  int new_s_reg = mir_to_kill->ssa_rep->defs[0];
+
+  // Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the
+  // same dalvik reg to keep consistency with subsequent instructions. However, if there's no
+  // defining MIR for that dalvik reg, the preserved valus must come from its predecessors
+  // and we need to create a new Phi (a degenerate Phi if there's only a single predecessor).
+  if (def_change == kNPos) {
+    bool fp = mir_to_kill->ssa_rep->fp_def[0];
+    if (wide) {
+      DCHECK_EQ(new_s_reg + 1, mir_to_kill->ssa_rep->defs[1]);
+      DCHECK_EQ(fp, mir_to_kill->ssa_rep->fp_def[1]);
+      DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1));
+      CreatePhi(new_s_reg + 1, fp);  // High word Phi.
+    }
+    return CreatePhi(new_s_reg, fp);
+  } else {
+    DCHECK_LT(def_change, last_change);
+    DCHECK_LE(last_change, vreg_chains_.NumMIRs());
+    MIRData* def_data = vreg_chains_.GetMIRData(def_change);
+    DCHECK(def_data->has_def);
+    int old_s_reg = def_data->mir->ssa_rep->defs[0];
+    DCHECK_NE(old_s_reg, new_s_reg);
+    DCHECK_EQ(mir_graph_->SRegToVReg(old_s_reg), mir_graph_->SRegToVReg(new_s_reg));
+    def_data->mir->ssa_rep->defs[0] = new_s_reg;
+    if (wide) {
+      if (static_cast<int>(def_data->mir->dalvikInsn.opcode) == kMirOpPhi) {
+        // Currently the high word Phi is always located after the low word Phi.
+        MIR* phi_high = def_data->mir->next;
+        DCHECK(phi_high != nullptr && static_cast<int>(phi_high->dalvikInsn.opcode) == kMirOpPhi);
+        DCHECK_EQ(phi_high->ssa_rep->defs[0], old_s_reg + 1);
+        phi_high->ssa_rep->defs[0] = new_s_reg + 1;
+      } else {
+        DCHECK_EQ(def_data->mir->ssa_rep->defs[1], old_s_reg + 1);
+        def_data->mir->ssa_rep->defs[1] = new_s_reg + 1;
+      }
+    }
+    vreg_chains_.RenameSRegUses(def_change + 1u, last_change, old_s_reg, new_s_reg, wide);
+    return nullptr;
+  }
+}
+
+
+void GvnDeadCodeElimination::BackwardPassProcessLastMIR() {
+  MIRData* data = vreg_chains_.LastMIRData();
+  if (data->uses_all_vregs) {
+    DCHECK(data->must_keep);
+    unused_vregs_->ClearAllBits();
+    DCHECK_EQ(no_uses_all_since_, vreg_chains_.NumMIRs());
+    --no_uses_all_since_;
+    while (no_uses_all_since_ != 0u &&
+        !vreg_chains_.GetMIRData(no_uses_all_since_ - 1u)->uses_all_vregs) {
+      --no_uses_all_since_;
+    }
+  } else {
+    if (data->has_def) {
+      unused_vregs_->SetBit(data->vreg_def);
+      if (data->wide_def) {
+        unused_vregs_->SetBit(data->vreg_def + 1);
+      }
+    }
+    for (int i = 0, num_uses = data->mir->ssa_rep->num_uses; i != num_uses; ++i) {
+      int v_reg = mir_graph_->SRegToVReg(data->mir->ssa_rep->uses[i]);
+      unused_vregs_->ClearBit(v_reg);
+    }
+  }
+  vreg_chains_.RemoveLastMIRData();
+}
+
+void GvnDeadCodeElimination::RecordPassKillMoveByRenamingSrcDef(uint16_t src_change,
+                                                                uint16_t move_change) {
+  DCHECK_LT(src_change, move_change);
+  MIRData* src_data = vreg_chains_.GetMIRData(src_change);
+  MIRData* move_data = vreg_chains_.GetMIRData(move_change);
+  DCHECK(src_data->is_move_src);
+  DCHECK_EQ(src_data->wide_def, move_data->wide_def);
+  DCHECK(move_data->prev_value.change == kNPos || move_data->prev_value.change <= src_change);
+  DCHECK(!move_data->wide_def || move_data->prev_value_high.change == kNPos ||
+         move_data->prev_value_high.change <= src_change);
+
+  int old_s_reg = src_data->mir->ssa_rep->defs[0];
+  // NOTE: old_s_reg may differ from move_data->mir->ssa_rep->uses[0]; value names must match.
+  int new_s_reg = move_data->mir->ssa_rep->defs[0];
+  DCHECK_NE(old_s_reg, new_s_reg);
+
+  if (IsInstructionBinOp2Addr(src_data->mir->dalvikInsn.opcode) &&
+      src_data->vreg_def != move_data->vreg_def) {
+    // Rewrite binop_2ADDR with plain binop before doing the register rename.
+    ChangeBinOp2AddrToPlainBinOp(src_data->mir);
+  }
+  // Remove src_change from the vreg chain(s).
+  vreg_chains_.RemoveChange(src_change);
+  // Replace the move_change with the src_change, copying all necessary data.
+  src_data->is_move_src = move_data->is_move_src;
+  src_data->low_def_over_high_word = move_data->low_def_over_high_word;
+  src_data->high_def_over_low_word = move_data->high_def_over_low_word;
+  src_data->vreg_def = move_data->vreg_def;
+  src_data->prev_value = move_data->prev_value;
+  src_data->prev_value_high = move_data->prev_value_high;
+  src_data->mir->dalvikInsn.vA = move_data->vreg_def;
+  src_data->mir->ssa_rep->defs[0] = new_s_reg;
+  if (move_data->wide_def) {
+    DCHECK_EQ(src_data->mir->ssa_rep->defs[1], old_s_reg + 1);
+    src_data->mir->ssa_rep->defs[1] = new_s_reg + 1;
+  }
+  vreg_chains_.ReplaceChange(move_change, src_change);
+
+  // Rename uses and kill the move.
+  vreg_chains_.RenameVRegUses(src_change + 1u, vreg_chains_.NumMIRs(),
+                              old_s_reg, mir_graph_->SRegToVReg(old_s_reg),
+                              new_s_reg, mir_graph_->SRegToVReg(new_s_reg));
+  KillMIR(move_data);
+}
+
+void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_t check_change) {
+  MIRData* data = vreg_chains_.GetMIRData(check_change);
+  DCHECK(data->is_move || data->is_move_src);
+  int32_t dest_s_reg = data->mir->ssa_rep->defs[0];
+
+  if (data->is_move) {
+    // Check if source vreg has changed since the MOVE.
+    int32_t src_s_reg = data->mir->ssa_rep->uses[0];
+    uint32_t src_v_reg = mir_graph_->SRegToVReg(src_s_reg);
+    uint16_t src_change = vreg_chains_.FindFirstChangeAfter(src_v_reg, check_change);
+    bool wide = data->wide_def;
+    if (wide) {
+      uint16_t src_change_high = vreg_chains_.FindFirstChangeAfter(src_v_reg + 1, check_change);
+      if (src_change_high != kNPos && (src_change == kNPos || src_change_high < src_change)) {
+        src_change = src_change_high;
+      }
+    }
+    if (src_change == kNPos ||
+        !vreg_chains_.IsSRegUsed(src_change + 1u, vreg_chains_.NumMIRs(), dest_s_reg)) {
+      // We can simply change all uses of dest to src.
+      size_t rename_end = (src_change != kNPos) ? src_change + 1u : vreg_chains_.NumMIRs();
+      vreg_chains_.RenameVRegUses(check_change + 1u, rename_end,
+                                  dest_s_reg, mir_graph_->SRegToVReg(dest_s_reg),
+                                  src_s_reg,  mir_graph_->SRegToVReg(src_s_reg));
+
+      // Now, remove the MOVE from the vreg chain(s) and kill it.
+      vreg_chains_.RemoveChange(check_change);
+      KillMIR(data);
+      return;
+    }
+  }
+
+  if (data->is_move_src) {
+    // Try to find a MOVE to a vreg that wasn't changed since check_change.
+    uint16_t value_name =
+        data->wide_def ? lvn_->GetSregValueWide(dest_s_reg) : lvn_->GetSregValue(dest_s_reg);
+    for (size_t c = check_change + 1u, size = vreg_chains_.NumMIRs(); c != size; ++c) {
+      MIRData* d = vreg_chains_.GetMIRData(c);
+      if (d->is_move && d->wide_def == data->wide_def &&
+          (d->prev_value.change == kNPos || d->prev_value.change <= check_change) &&
+          (!d->wide_def ||
+           d->prev_value_high.change == kNPos || d->prev_value_high.change <= check_change)) {
+        // Compare value names to find move to move.
+        int32_t src_s_reg = d->mir->ssa_rep->uses[0];
+        uint16_t src_name =
+            (d->wide_def ? lvn_->GetSregValueWide(src_s_reg) : lvn_->GetSregValue(src_s_reg));
+        if (value_name == src_name) {
+          RecordPassKillMoveByRenamingSrcDef(check_change, c);
+          return;
+        }
+      }
+    }
+  }
+}
+
+void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc() {
+  // Check if we're overwriting a the result of a move or the definition of a source of a move.
+  // For MOVE_WIDE, we may be overwriting partially; if that's the case, check that the other
+  // word wasn't previously overwritten - we would have tried to rename back then.
+  MIRData* data = vreg_chains_.LastMIRData();
+  if (!data->has_def) {
+    return;
+  }
+  // NOTE: Instructions such as new-array implicitly use all vregs (if they throw) but they can
+  // define a move source which can be renamed. Therefore we allow the checked change to be the
+  // change before no_uses_all_since_. This has no effect on moves as they never use all vregs.
+  if (data->prev_value.change != kNPos && data->prev_value.change + 1u >= no_uses_all_since_) {
+    MIRData* check_data = vreg_chains_.GetMIRData(data->prev_value.change);
+    bool try_to_kill = false;
+    if (!check_data->is_move && !check_data->is_move_src) {
+      DCHECK(!try_to_kill);
+    } else if (!check_data->wide_def) {
+      // Narrow move; always fully overwritten by the last MIR.
+      try_to_kill = true;
+    } else if (data->low_def_over_high_word) {
+      // Overwriting only the high word; is the low word still valid?
+      DCHECK_EQ(check_data->vreg_def + 1u, data->vreg_def);
+      if (vreg_chains_.LastChange(check_data->vreg_def) == data->prev_value.change) {
+        try_to_kill = true;
+      }
+    } else if (!data->wide_def) {
+      // Overwriting only the low word, is the high word still valid?
+      if (vreg_chains_.LastChange(data->vreg_def + 1) == data->prev_value.change) {
+        try_to_kill = true;
+      }
+    } else {
+      // Overwriting both words; was the high word still from the same move?
+      if (data->prev_value_high.change == data->prev_value.change) {
+        try_to_kill = true;
+      }
+    }
+    if (try_to_kill) {
+      RecordPassTryToKillOverwrittenMoveOrMoveSrc(data->prev_value.change);
+    }
+  }
+  if (data->wide_def && data->high_def_over_low_word &&
+      data->prev_value_high.change != kNPos &&
+      data->prev_value_high.change + 1u >= no_uses_all_since_) {
+    MIRData* check_data = vreg_chains_.GetMIRData(data->prev_value_high.change);
+    bool try_to_kill = false;
+    if (!check_data->is_move && !check_data->is_move_src) {
+      DCHECK(!try_to_kill);
+    } else if (!check_data->wide_def) {
+      // Narrow move; always fully overwritten by the last MIR.
+      try_to_kill = true;
+    } else if (vreg_chains_.LastChange(check_data->vreg_def + 1) ==
+        data->prev_value_high.change) {
+      // High word is still valid.
+      try_to_kill = true;
+    }
+    if (try_to_kill) {
+      RecordPassTryToKillOverwrittenMoveOrMoveSrc(data->prev_value_high.change);
+    }
+  }
+}
+
+void GvnDeadCodeElimination::RecordPassTryToKillLastMIR() {
+  MIRData* last_data = vreg_chains_.LastMIRData();
+  if (last_data->must_keep) {
+    return;
+  }
+  if (UNLIKELY(!last_data->has_def)) {
+    // Must be an eliminated MOVE. Drop its data and data of all eliminated MIRs before it.
+    vreg_chains_.RemoveTrailingNops();
+    return;
+  }
+
+  // Try to kill a sequence of consecutive definitions of the same vreg. Allow mixing
+  // wide and non-wide defs; consider high word dead if low word has been overwritten.
+  uint16_t current_value = vreg_chains_.CurrentValue(last_data->vreg_def);
+  uint16_t change = vreg_chains_.NumMIRs() - 1u;
+  MIRData* data = last_data;
+  while (data->prev_value.value != current_value) {
+    --change;
+    if (data->prev_value.change == kNPos || data->prev_value.change != change) {
+      return;
+    }
+    data = vreg_chains_.GetMIRData(data->prev_value.change);
+    if (data->must_keep || !data->has_def || data->vreg_def != last_data->vreg_def) {
+      return;
+    }
+  }
+
+  bool wide = last_data->wide_def;
+  if (wide) {
+    // Check that the low word is valid.
+    if (data->low_def_over_high_word) {
+      return;
+    }
+    // Check that the high word is valid.
+    MIRData* high_data = data;
+    if (!high_data->wide_def) {
+      uint16_t high_change = vreg_chains_.FindFirstChangeAfter(data->vreg_def + 1, change);
+      DCHECK_NE(high_change, kNPos);
+      high_data = vreg_chains_.GetMIRData(high_change);
+      DCHECK_EQ(high_data->vreg_def, data->vreg_def);
+    }
+    if (high_data->prev_value_high.value != current_value || high_data->high_def_over_low_word) {
+      return;
+    }
+  }
+
+  MIR* phi = RenameSRegDefOrCreatePhi(data->prev_value.change, change, last_data->mir);
+  for (size_t i = 0, count = vreg_chains_.NumMIRs() - change; i != count; ++i) {
+    KillMIR(vreg_chains_.LastMIRData()->mir);
+    vreg_chains_.RemoveLastMIRData();
+  }
+  if (phi != nullptr) {
+    // Though the Phi has been added to the beginning, we can put the MIRData at the end.
+    vreg_chains_.AddMIRWithDef(phi, phi->dalvikInsn.vA, wide, current_value);
+    // Reset the previous value to avoid eventually eliminating the Phi itself (unless unused).
+    last_data = vreg_chains_.LastMIRData();
+    last_data->prev_value.value = kNoValue;
+    last_data->prev_value_high.value = kNoValue;
+  }
+}
+
+uint16_t GvnDeadCodeElimination::FindChangesToKill(uint16_t first_change, uint16_t last_change) {
+  // Process dependencies for changes in range [first_change, last_change) and record all
+  // changes that we need to kill. Return kNPos if there's a dependent change that must be
+  // kept unconditionally; otherwise the end of the range processed before encountering
+  // a change that defines a dalvik reg that we need to keep (last_change on full success).
+  changes_to_kill_.clear();
+  dependent_vregs_->ClearAllBits();
+  for (size_t change = first_change; change != last_change; ++change) {
+    MIRData* data = vreg_chains_.GetMIRData(change);
+    DCHECK(!data->uses_all_vregs);
+    bool must_not_depend = data->must_keep;
+    bool depends = false;
+    // Check if the MIR defines a vreg we're trying to eliminate.
+    if (data->has_def && vregs_to_kill_->IsBitSet(data->vreg_def)) {
+      if (change < kill_heads_[data->vreg_def]) {
+        must_not_depend = true;
+      } else {
+        depends = true;
+      }
+    }
+    if (data->has_def && data->wide_def && vregs_to_kill_->IsBitSet(data->vreg_def + 1)) {
+      if (change < kill_heads_[data->vreg_def + 1]) {
+        must_not_depend = true;
+      } else {
+        depends = true;
+      }
+    }
+    if (!depends) {
+      // Check for dependency through SSA reg uses.
+      SSARepresentation* ssa_rep = data->mir->ssa_rep;
+      for (int i = 0; i != ssa_rep->num_uses; ++i) {
+        if (dependent_vregs_->IsBitSet(mir_graph_->SRegToVReg(ssa_rep->uses[i]))) {
+          depends = true;
+          break;
+        }
+      }
+    }
+    // Now check if we can eliminate the insn if we need to.
+    if (depends && must_not_depend) {
+      return kNPos;
+    }
+    if (depends && data->has_def &&
+        vreg_chains_.IsTopChange(change) && !vregs_to_kill_->IsBitSet(data->vreg_def) &&
+        !unused_vregs_->IsBitSet(data->vreg_def) &&
+        (!data->wide_def || !unused_vregs_->IsBitSet(data->vreg_def + 1))) {
+      // This is a top change but neither unnecessary nor one of the top kill changes.
+      return change;
+    }
+    // Finally, update the data.
+    if (depends) {
+      changes_to_kill_.push_back(change);
+      if (data->has_def) {
+        dependent_vregs_->SetBit(data->vreg_def);
+        if (data->wide_def) {
+          dependent_vregs_->SetBit(data->vreg_def + 1);
+        }
+      }
+    } else {
+      if (data->has_def) {
+        dependent_vregs_->ClearBit(data->vreg_def);
+        if (data->wide_def) {
+          dependent_vregs_->ClearBit(data->vreg_def + 1);
+        }
+      }
+    }
+  }
+  return last_change;
+}
+
+void GvnDeadCodeElimination::BackwardPassTryToKillRevertVRegs() {
+}
+
+bool GvnDeadCodeElimination::BackwardPassTryToKillLastMIR() {
+  MIRData* last_data = vreg_chains_.LastMIRData();
+  if (last_data->must_keep) {
+    return false;
+  }
+  DCHECK(!last_data->uses_all_vregs);
+  if (!last_data->has_def) {
+    // Previously eliminated.
+    DCHECK_EQ(static_cast<int>(last_data->mir->dalvikInsn.opcode), static_cast<int>(kMirOpNop));
+    vreg_chains_.RemoveTrailingNops();
+    return true;
+  }
+  if (unused_vregs_->IsBitSet(last_data->vreg_def) ||
+      (last_data->wide_def && unused_vregs_->IsBitSet(last_data->vreg_def + 1))) {
+    if (last_data->wide_def) {
+      // For wide defs, one of the vregs may still be considered needed, fix that.
+      unused_vregs_->SetBit(last_data->vreg_def);
+      unused_vregs_->SetBit(last_data->vreg_def + 1);
+    }
+    KillMIR(last_data->mir);
+    vreg_chains_.RemoveLastMIRData();
+    return true;
+  }
+
+  vregs_to_kill_->ClearAllBits();
+  size_t num_mirs = vreg_chains_.NumMIRs();
+  DCHECK_NE(num_mirs, 0u);
+  uint16_t kill_change = num_mirs - 1u;
+  uint16_t start = num_mirs;
+  size_t num_killed_top_changes = 0u;
+  while (num_killed_top_changes != kMaxNumTopChangesToKill &&
+      kill_change != kNPos && kill_change != num_mirs) {
+    ++num_killed_top_changes;
+
+    DCHECK(vreg_chains_.IsTopChange(kill_change));
+    MIRData* data = vreg_chains_.GetMIRData(kill_change);
+    int count = data->wide_def ? 2 : 1;
+    for (int v_reg = data->vreg_def, end = data->vreg_def + count; v_reg != end; ++v_reg) {
+      uint16_t kill_head = vreg_chains_.FindKillHead(v_reg, no_uses_all_since_);
+      if (kill_head == kNPos) {
+        return false;
+      }
+      kill_heads_[v_reg] = kill_head;
+      vregs_to_kill_->SetBit(v_reg);
+      start = std::min(start, kill_head);
+    }
+    DCHECK_LT(start, vreg_chains_.NumMIRs());
+
+    kill_change = FindChangesToKill(start, num_mirs);
+  }
+
+  if (kill_change != num_mirs) {
+    return false;
+  }
+
+  // Kill all MIRs marked as dependent.
+  for (uint32_t v_reg : vregs_to_kill_->Indexes()) {
+    // Rename s_regs or create Phi only once for each MIR (only for low word).
+    MIRData* data = vreg_chains_.GetMIRData(vreg_chains_.LastChange(v_reg));
+    DCHECK(data->has_def);
+    if (data->vreg_def == v_reg) {
+      MIRData* kill_head_data = vreg_chains_.GetMIRData(kill_heads_[v_reg]);
+      RenameSRegDefOrCreatePhi(kill_head_data->PrevChange(v_reg), num_mirs, data->mir);
+    } else {
+      DCHECK_EQ(data->vreg_def + 1u, v_reg);
+      DCHECK_EQ(vreg_chains_.GetMIRData(kill_heads_[v_reg - 1u])->PrevChange(v_reg - 1u),
+                vreg_chains_.GetMIRData(kill_heads_[v_reg])->PrevChange(v_reg));
+    }
+  }
+  unused_vregs_->Union(vregs_to_kill_);
+  for (auto it = changes_to_kill_.rbegin(), end = changes_to_kill_.rend(); it != end; ++it) {
+    MIRData* data = vreg_chains_.GetMIRData(*it);
+    DCHECK(!data->must_keep);
+    DCHECK(data->has_def);
+    vreg_chains_.RemoveChange(*it);
+    KillMIR(data);
+  }
+
+  vreg_chains_.RemoveTrailingNops();
+  return true;
+}
+
+bool GvnDeadCodeElimination::RecordMIR(MIR* mir) {
+  bool must_keep = false;
+  bool uses_all_vregs = false;
+  bool is_move = false;
+  uint16_t opcode = mir->dalvikInsn.opcode;
+  switch (opcode) {
+    case kMirOpPhi: {
+      // We can't recognize wide variables in Phi from num_defs == 2 as we've got two Phis instead.
+      DCHECK_EQ(mir->ssa_rep->num_defs, 1);
+      int s_reg = mir->ssa_rep->defs[0];
+      bool wide = false;
+      uint16_t new_value = lvn_->GetSregValue(s_reg);
+      if (new_value == kNoValue) {
+        wide = true;
+        new_value = lvn_->GetSregValueWide(s_reg);
+        if (new_value == kNoValue) {
+          return false;  // Ignore the high word Phi.
+        }
+      }
+
+      int v_reg = mir_graph_->SRegToVReg(s_reg);
+      DCHECK_EQ(vreg_chains_.CurrentValue(v_reg), kNoValue);  // No previous def for v_reg.
+      if (wide) {
+        DCHECK_EQ(vreg_chains_.CurrentValue(v_reg + 1), kNoValue);
+      }
+      vreg_chains_.AddMIRWithDef(mir, v_reg, wide, new_value);
+      return true;  // Avoid the common processing.
+    }
+
+    case kMirOpNop:
+    case Instruction::NOP:
+      // Don't record NOPs.
+      return false;
+
+    case kMirOpCheck:
+      must_keep = true;
+      uses_all_vregs = true;
+      break;
+
+    case Instruction::RETURN_VOID:
+    case Instruction::RETURN:
+    case Instruction::RETURN_OBJECT:
+    case Instruction::RETURN_WIDE:
+    case Instruction::GOTO:
+    case Instruction::GOTO_16:
+    case Instruction::GOTO_32:
+    case Instruction::PACKED_SWITCH:
+    case Instruction::SPARSE_SWITCH:
+    case Instruction::IF_EQ:
+    case Instruction::IF_NE:
+    case Instruction::IF_LT:
+    case Instruction::IF_GE:
+    case Instruction::IF_GT:
+    case Instruction::IF_LE:
+    case Instruction::IF_EQZ:
+    case Instruction::IF_NEZ:
+    case Instruction::IF_LTZ:
+    case Instruction::IF_GEZ:
+    case Instruction::IF_GTZ:
+    case Instruction::IF_LEZ:
+    case kMirOpFusedCmplFloat:
+    case kMirOpFusedCmpgFloat:
+    case kMirOpFusedCmplDouble:
+    case kMirOpFusedCmpgDouble:
+    case kMirOpFusedCmpLong:
+      must_keep = true;
+      uses_all_vregs = true;  // Keep the implicit dependencies on all vregs.
+      break;
+
+    case Instruction::CONST_CLASS:
+    case Instruction::CONST_STRING:
+    case Instruction::CONST_STRING_JUMBO:
+      // NOTE: While we're currently treating CONST_CLASS, CONST_STRING and CONST_STRING_JUMBO
+      // as throwing but we could conceivably try and eliminate those exceptions if we're
+      // retrieving the class/string repeatedly.
+      must_keep = true;
+      uses_all_vregs = true;
+      break;
+
+    case Instruction::MONITOR_ENTER:
+    case Instruction::MONITOR_EXIT:
+      // We can actually try and optimize across the acquire operation of MONITOR_ENTER,
+      // the value names provided by GVN reflect the possible changes to memory visibility.
+      // NOTE: In ART, MONITOR_ENTER and MONITOR_EXIT can throw only NPE.
+      must_keep = true;
+      uses_all_vregs = (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0;
+      break;
+
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::CHECK_CAST:
+    case Instruction::THROW:
+    case Instruction::FILLED_NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY_RANGE:
+    case Instruction::FILL_ARRAY_DATA:
+      must_keep = true;
+      uses_all_vregs = true;
+      break;
+
+    case Instruction::NEW_INSTANCE:
+    case Instruction::NEW_ARRAY:
+      must_keep = true;
+      uses_all_vregs = true;
+      break;
+
+    case kMirOpNullCheck:
+      DCHECK_EQ(mir->ssa_rep->num_uses, 1);
+      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
+        mir->ssa_rep->num_uses = 0;
+        mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+        return false;
+      }
+      must_keep = true;
+      uses_all_vregs = true;
+      break;
+
+    case Instruction::MOVE_RESULT:
+    case Instruction::MOVE_RESULT_OBJECT:
+    case Instruction::MOVE_RESULT_WIDE:
+      break;
+
+    case Instruction::INSTANCE_OF:
+      break;
+
+    case Instruction::MOVE_EXCEPTION:
+      must_keep = true;
+      break;
+
+    case kMirOpCopy:
+    case Instruction::MOVE:
+    case Instruction::MOVE_FROM16:
+    case Instruction::MOVE_16:
+    case Instruction::MOVE_WIDE:
+    case Instruction::MOVE_WIDE_FROM16:
+    case Instruction::MOVE_WIDE_16:
+    case Instruction::MOVE_OBJECT:
+    case Instruction::MOVE_OBJECT_FROM16:
+    case Instruction::MOVE_OBJECT_16: {
+      is_move = true;
+      // If the MIR defining src vreg is known, allow renaming all uses of src vreg to dest vreg
+      // while updating the defining MIR to directly define dest vreg. However, changing Phi's
+      // def this way doesn't work without changing MIRs in other BBs.
+      int src_v_reg = mir_graph_->SRegToVReg(mir->ssa_rep->uses[0]);
+      int src_change = vreg_chains_.LastChange(src_v_reg);
+      if (src_change != kNPos) {
+        MIRData* src_data = vreg_chains_.GetMIRData(src_change);
+        if (static_cast<int>(src_data->mir->dalvikInsn.opcode) != kMirOpPhi) {
+          src_data->is_move_src = true;
+        }
+      }
+      break;
+    }
+
+    case Instruction::CONST_4:
+    case Instruction::CONST_16:
+    case Instruction::CONST:
+    case Instruction::CONST_HIGH16:
+    case Instruction::CONST_WIDE_16:
+    case Instruction::CONST_WIDE_32:
+    case Instruction::CONST_WIDE:
+    case Instruction::CONST_WIDE_HIGH16:
+    case Instruction::ARRAY_LENGTH:
+    case Instruction::CMPL_FLOAT:
+    case Instruction::CMPG_FLOAT:
+    case Instruction::CMPL_DOUBLE:
+    case Instruction::CMPG_DOUBLE:
+    case Instruction::CMP_LONG:
+    case Instruction::NEG_INT:
+    case Instruction::NOT_INT:
+    case Instruction::NEG_LONG:
+    case Instruction::NOT_LONG:
+    case Instruction::NEG_FLOAT:
+    case Instruction::NEG_DOUBLE:
+    case Instruction::INT_TO_LONG:
+    case Instruction::INT_TO_FLOAT:
+    case Instruction::INT_TO_DOUBLE:
+    case Instruction::LONG_TO_INT:
+    case Instruction::LONG_TO_FLOAT:
+    case Instruction::LONG_TO_DOUBLE:
+    case Instruction::FLOAT_TO_INT:
+    case Instruction::FLOAT_TO_LONG:
+    case Instruction::FLOAT_TO_DOUBLE:
+    case Instruction::DOUBLE_TO_INT:
+    case Instruction::DOUBLE_TO_LONG:
+    case Instruction::DOUBLE_TO_FLOAT:
+    case Instruction::INT_TO_BYTE:
+    case Instruction::INT_TO_CHAR:
+    case Instruction::INT_TO_SHORT:
+    case Instruction::ADD_INT:
+    case Instruction::SUB_INT:
+    case Instruction::MUL_INT:
+    case Instruction::AND_INT:
+    case Instruction::OR_INT:
+    case Instruction::XOR_INT:
+    case Instruction::SHL_INT:
+    case Instruction::SHR_INT:
+    case Instruction::USHR_INT:
+    case Instruction::ADD_LONG:
+    case Instruction::SUB_LONG:
+    case Instruction::MUL_LONG:
+    case Instruction::AND_LONG:
+    case Instruction::OR_LONG:
+    case Instruction::XOR_LONG:
+    case Instruction::SHL_LONG:
+    case Instruction::SHR_LONG:
+    case Instruction::USHR_LONG:
+    case Instruction::ADD_FLOAT:
+    case Instruction::SUB_FLOAT:
+    case Instruction::MUL_FLOAT:
+    case Instruction::DIV_FLOAT:
+    case Instruction::REM_FLOAT:
+    case Instruction::ADD_DOUBLE:
+    case Instruction::SUB_DOUBLE:
+    case Instruction::MUL_DOUBLE:
+    case Instruction::DIV_DOUBLE:
+    case Instruction::REM_DOUBLE:
+    case Instruction::ADD_INT_2ADDR:
+    case Instruction::SUB_INT_2ADDR:
+    case Instruction::MUL_INT_2ADDR:
+    case Instruction::AND_INT_2ADDR:
+    case Instruction::OR_INT_2ADDR:
+    case Instruction::XOR_INT_2ADDR:
+    case Instruction::SHL_INT_2ADDR:
+    case Instruction::SHR_INT_2ADDR:
+    case Instruction::USHR_INT_2ADDR:
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::SUB_LONG_2ADDR:
+    case Instruction::MUL_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+    case Instruction::SHL_LONG_2ADDR:
+    case Instruction::SHR_LONG_2ADDR:
+    case Instruction::USHR_LONG_2ADDR:
+    case Instruction::ADD_FLOAT_2ADDR:
+    case Instruction::SUB_FLOAT_2ADDR:
+    case Instruction::MUL_FLOAT_2ADDR:
+    case Instruction::DIV_FLOAT_2ADDR:
+    case Instruction::REM_FLOAT_2ADDR:
+    case Instruction::ADD_DOUBLE_2ADDR:
+    case Instruction::SUB_DOUBLE_2ADDR:
+    case Instruction::MUL_DOUBLE_2ADDR:
+    case Instruction::DIV_DOUBLE_2ADDR:
+    case Instruction::REM_DOUBLE_2ADDR:
+    case Instruction::ADD_INT_LIT16:
+    case Instruction::RSUB_INT:
+    case Instruction::MUL_INT_LIT16:
+    case Instruction::AND_INT_LIT16:
+    case Instruction::OR_INT_LIT16:
+    case Instruction::XOR_INT_LIT16:
+    case Instruction::ADD_INT_LIT8:
+    case Instruction::RSUB_INT_LIT8:
+    case Instruction::MUL_INT_LIT8:
+    case Instruction::AND_INT_LIT8:
+    case Instruction::OR_INT_LIT8:
+    case Instruction::XOR_INT_LIT8:
+    case Instruction::SHL_INT_LIT8:
+    case Instruction::SHR_INT_LIT8:
+    case Instruction::USHR_INT_LIT8:
+      break;
+
+    case Instruction::DIV_INT:
+    case Instruction::REM_INT:
+    case Instruction::DIV_LONG:
+    case Instruction::REM_LONG:
+    case Instruction::DIV_INT_2ADDR:
+    case Instruction::REM_INT_2ADDR:
+    case Instruction::DIV_LONG_2ADDR:
+    case Instruction::REM_LONG_2ADDR:
+      if ((mir->optimization_flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
+        must_keep = true;
+        uses_all_vregs = true;
+      }
+      break;
+
+    case Instruction::DIV_INT_LIT16:
+    case Instruction::REM_INT_LIT16:
+    case Instruction::DIV_INT_LIT8:
+    case Instruction::REM_INT_LIT8:
+      if (mir->dalvikInsn.vC == 0) {  // Explicit division by 0?
+        must_keep = true;
+        uses_all_vregs = true;
+      }
+      break;
+
+    case Instruction::AGET_OBJECT:
+    case Instruction::AGET:
+    case Instruction::AGET_WIDE:
+    case Instruction::AGET_BOOLEAN:
+    case Instruction::AGET_BYTE:
+    case Instruction::AGET_CHAR:
+    case Instruction::AGET_SHORT:
+      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
+          (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) == 0) {
+        must_keep = true;
+        uses_all_vregs = true;
+      }
+      break;
+
+    case Instruction::APUT_OBJECT:
+    case Instruction::APUT:
+    case Instruction::APUT_WIDE:
+    case Instruction::APUT_BYTE:
+    case Instruction::APUT_BOOLEAN:
+    case Instruction::APUT_SHORT:
+    case Instruction::APUT_CHAR:
+      must_keep = true;
+      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
+          (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) == 0) {
+        uses_all_vregs = true;
+      }
+      break;
+
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_SHORT: {
+      const MirIFieldLoweringInfo& info = mir_graph_->GetIFieldLoweringInfo(mir);
+      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
+          !info.IsResolved() || !info.FastGet()) {
+        must_keep = true;
+        uses_all_vregs = true;
+      } else if (info.IsVolatile()) {
+        must_keep = true;
+      }
+      break;
+    }
+
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_SHORT: {
+      must_keep = true;
+      const MirIFieldLoweringInfo& info = mir_graph_->GetIFieldLoweringInfo(mir);
+      if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
+          !info.IsResolved() || !info.FastPut()) {
+        uses_all_vregs = true;
+      }
+      break;
+    }
+
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT: {
+      const MirSFieldLoweringInfo& info = mir_graph_->GetSFieldLoweringInfo(mir);
+      if ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0 ||
+          !info.IsResolved() || !info.FastGet()) {
+        must_keep = true;
+        uses_all_vregs = true;
+      } else if (info.IsVolatile()) {
+        must_keep = true;
+      }
+      break;
+    }
+
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      must_keep = true;
+      const MirSFieldLoweringInfo& info = mir_graph_->GetSFieldLoweringInfo(mir);
+      if ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0 ||
+          !info.IsResolved() || !info.FastPut()) {
+        uses_all_vregs = true;
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << opcode;
+      UNREACHABLE();
+      break;
+  }
+
+  if (mir->ssa_rep->num_defs != 0) {
+    DCHECK(mir->ssa_rep->num_defs == 1 || mir->ssa_rep->num_defs == 2);
+    bool wide = (mir->ssa_rep->num_defs == 2);
+    int s_reg = mir->ssa_rep->defs[0];
+    int v_reg = mir_graph_->SRegToVReg(s_reg);
+    uint16_t new_value = wide ? lvn_->GetSregValueWide(s_reg) : lvn_->GetSregValue(s_reg);
+    DCHECK_NE(new_value, kNoValue);
+
+    vreg_chains_.UpdateInitialVRegValue(v_reg, wide, lvn_);
+    vreg_chains_.AddMIRWithDef(mir, v_reg, wide, new_value);
+    if (is_move) {
+      // Allow renaming all uses of dest vreg to src vreg.
+      vreg_chains_.LastMIRData()->is_move = true;
+    }
+  } else {
+    vreg_chains_.AddMIRWithoutDef(mir);
+    DCHECK(!is_move) << opcode;
+  }
+
+  if (must_keep) {
+    MIRData* last_data = vreg_chains_.LastMIRData();
+    last_data->must_keep = true;
+    if (uses_all_vregs) {
+      last_data->uses_all_vregs = true;
+      no_uses_all_since_ = vreg_chains_.NumMIRs();
+    }
+  } else {
+    DCHECK_NE(mir->ssa_rep->num_defs, 0) << opcode;
+    DCHECK(!uses_all_vregs) << opcode;
+  }
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h
new file mode 100644
index 0000000000..9a19f29970
--- /dev/null
+++ b/compiler/dex/gvn_dead_code_elimination.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
+#define ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
+
+#include "base/arena_object.h"
+#include "base/scoped_arena_containers.h"
+#include "global_value_numbering.h"
+
+namespace art {
+
+class ArenaBitVector;
+class BasicBlock;
+class LocalValueNumbering;
+class MIR;
+class MIRGraph;
+
+/**
+ * @class DeadCodeElimination
+ * @details Eliminate dead code based on the results of global value numbering.
+ * Also get rid of MOVE insns when we can use the source instead of destination
+ * without affecting the vreg values at safepoints; this is useful in methods
+ * with a large number of vregs that frequently move values to and from low vregs
+ * to accommodate insns that can work only with the low 16 or 256 vregs.
+ */
+class GvnDeadCodeElimination : public DeletableArenaObject<kArenaAllocMisc> {
+ public:
+  GvnDeadCodeElimination(const GlobalValueNumbering* gvn, ScopedArenaAllocator* alloc);
+
+  // Apply the DCE to a basic block.
+  void Apply(BasicBlock* bb);
+
+ private:
+  static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
+  static constexpr uint16_t kNPos = 0xffffu;
+  static constexpr size_t kMaxNumTopChangesToKill = 2;
+
+  struct VRegValue {
+    VRegValue() : value(kNoValue), change(kNPos) { }
+
+    // Value name as reported by GVN, kNoValue if not available.
+    uint16_t value;
+    // Index of the change in mir_data_ that defined the value, kNPos if initial value for the BB.
+    uint16_t change;
+  };
+
+  struct MIRData {
+    explicit MIRData(MIR* m)
+        : mir(m), uses_all_vregs(false), must_keep(false), is_move(false), is_move_src(false),
+          has_def(false), wide_def(false),
+          low_def_over_high_word(false), high_def_over_low_word(false), vreg_def(0u),
+          prev_value(), prev_value_high() {
+    }
+
+    uint16_t PrevChange(int v_reg) const;
+    void SetPrevChange(int v_reg, uint16_t change);
+    void RemovePrevChange(int v_reg, MIRData* prev_data);
+
+    MIR* mir;
+    bool uses_all_vregs : 1;  // If mir uses all vregs, uses in mir->ssa_rep are irrelevant.
+    bool must_keep : 1;
+    bool is_move : 1;
+    bool is_move_src : 1;
+    bool has_def : 1;
+    bool wide_def : 1;
+    bool low_def_over_high_word : 1;
+    bool high_def_over_low_word : 1;
+    uint16_t vreg_def;
+    VRegValue prev_value;
+    VRegValue prev_value_high;   // For wide defs.
+  };
+
+  class VRegChains {
+   public:
+    VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc);
+
+    void Reset();
+
+    void AddMIRWithDef(MIR* mir, int v_reg, bool wide, uint16_t new_value);
+    void AddMIRWithoutDef(MIR* mir);
+    void RemoveLastMIRData();
+    void RemoveTrailingNops();
+
+    size_t NumMIRs() const;
+    MIRData* GetMIRData(size_t pos);
+    MIRData* LastMIRData();
+
+    uint32_t NumVRegs() const;
+    void InsertInitialValueHigh(int v_reg, uint16_t value);
+    void UpdateInitialVRegValue(int v_reg, bool wide, const LocalValueNumbering* lvn);
+    uint16_t LastChange(int v_reg);
+    uint16_t CurrentValue(int v_reg);
+
+    uint16_t FindKillHead(int v_reg, uint16_t cutoff);
+    uint16_t FindFirstChangeAfter(int v_reg, uint16_t change) const;
+    void ReplaceChange(uint16_t old_change, uint16_t new_change);
+    void RemoveChange(uint16_t change);
+    bool IsTopChange(uint16_t change) const;
+    bool IsSRegUsed(uint16_t first_change, uint16_t last_change, int s_reg) const;
+    void RenameSRegUses(uint16_t first_change, uint16_t last_change,
+                        int old_s_reg, int new_s_reg, bool wide);
+    void RenameVRegUses(uint16_t first_change, uint16_t last_change,
+                        int old_s_reg, int old_v_reg, int new_s_reg, int new_v_reg);
+
+   private:
+    const uint32_t num_vregs_;
+    VRegValue* const vreg_data_;
+    ScopedArenaVector<MIRData> mir_data_;
+  };
+
+  void RecordPass();
+  void BackwardPass();
+
+  void KillMIR(MIRData* data);
+  static void KillMIR(MIR* mir);
+  static void ChangeBinOp2AddrToPlainBinOp(MIR* mir);
+  MIR* CreatePhi(int s_reg, bool fp);
+  MIR* RenameSRegDefOrCreatePhi(uint16_t def_change, uint16_t last_change, MIR* mir_to_kill);
+
+  // Update state variables going backwards through a MIR.
+  void BackwardPassProcessLastMIR();
+
+  uint16_t FindChangesToKill(uint16_t first_change, uint16_t last_change);
+  void BackwardPassTryToKillRevertVRegs();
+  bool BackwardPassTryToKillLastMIR();
+
+  void RecordPassKillMoveByRenamingSrcDef(uint16_t src_change, uint16_t move_change);
+  void RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_t check_change);
+  void RecordPassTryToKillOverwrittenMoveOrMoveSrc();
+  void RecordPassTryToKillLastMIR();
+
+  bool RecordMIR(MIR* mir);
+
+  const GlobalValueNumbering* const gvn_;
+  MIRGraph* const mir_graph_;
+
+  VRegChains vreg_chains_;
+  BasicBlock* bb_;
+  const LocalValueNumbering* lvn_;
+  size_t no_uses_all_since_;  // The change index after the last change with uses_all_vregs set.
+
+  // Data used when processing MIRs in reverse order.
+  ArenaBitVector* unused_vregs_;              // vregs that are not needed later.
+  ArenaBitVector* vregs_to_kill_;             // vregs that revert to a previous value.
+  uint16_t* kill_heads_;  // For each vreg in vregs_to_kill_, the first change to kill.
+  ScopedArenaVector<uint16_t> changes_to_kill_;
+  ArenaBitVector* dependent_vregs_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
new file mode 100644
index 0000000000..954e9f1d37
--- /dev/null
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -0,0 +1,1800 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dataflow_iterator-inl.h"
+#include "dex/mir_field_info.h"
+#include "global_value_numbering.h"
+#include "gvn_dead_code_elimination.h"
+#include "local_value_numbering.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class GvnDeadCodeEliminationTest : public testing::Test {
+ protected:
+  static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
+
+  struct IFieldDef {
+    uint16_t field_idx;
+    uintptr_t declaring_dex_file;
+    uint16_t declaring_field_idx;
+    bool is_volatile;
+    DexMemAccessType type;
+  };
+
+  struct SFieldDef {
+    uint16_t field_idx;
+    uintptr_t declaring_dex_file;
+    uint16_t declaring_field_idx;
+    bool is_volatile;
+    DexMemAccessType type;
+  };
+
+  struct BBDef {
+    static constexpr size_t kMaxSuccessors = 4;
+    static constexpr size_t kMaxPredecessors = 4;
+
+    BBType type;
+    size_t num_successors;
+    BasicBlockId successors[kMaxPredecessors];
+    size_t num_predecessors;
+    BasicBlockId predecessors[kMaxPredecessors];
+  };
+
+  struct MIRDef {
+    static constexpr size_t kMaxSsaDefs = 2;
+    static constexpr size_t kMaxSsaUses = 4;
+
+    BasicBlockId bbid;
+    Instruction::Code opcode;
+    int64_t value;
+    uint32_t field_info;
+    size_t num_uses;
+    int32_t uses[kMaxSsaUses];
+    size_t num_defs;
+    int32_t defs[kMaxSsaDefs];
+  };
+
+#define DEF_SUCC0() \
+    0u, { }
+#define DEF_SUCC1(s1) \
+    1u, { s1 }
+#define DEF_SUCC2(s1, s2) \
+    2u, { s1, s2 }
+#define DEF_SUCC3(s1, s2, s3) \
+    3u, { s1, s2, s3 }
+#define DEF_SUCC4(s1, s2, s3, s4) \
+    4u, { s1, s2, s3, s4 }
+#define DEF_PRED0() \
+    0u, { }
+#define DEF_PRED1(p1) \
+    1u, { p1 }
+#define DEF_PRED2(p1, p2) \
+    2u, { p1, p2 }
+#define DEF_PRED3(p1, p2, p3) \
+    3u, { p1, p2, p3 }
+#define DEF_PRED4(p1, p2, p3, p4) \
+    4u, { p1, p2, p3, p4 }
+#define DEF_BB(type, succ, pred) \
+    { type, succ, pred }
+
+#define DEF_CONST(bb, opcode, reg, value) \
+    { bb, opcode, value, 0u, 0, { }, 1, { reg } }
+#define DEF_CONST_WIDE(bb, opcode, reg, value) \
+    { bb, opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } }
+#define DEF_CONST_STRING(bb, opcode, reg, index) \
+    { bb, opcode, index, 0u, 0, { }, 1, { reg } }
+#define DEF_IGET(bb, opcode, reg, obj, field_info) \
+    { bb, opcode, 0u, field_info, 1, { obj }, 1, { reg } }
+#define DEF_IGET_WIDE(bb, opcode, reg, obj, field_info) \
+    { bb, opcode, 0u, field_info, 1, { obj }, 2, { reg, reg + 1 } }
+#define DEF_IPUT(bb, opcode, reg, obj, field_info) \
+    { bb, opcode, 0u, field_info, 2, { reg, obj }, 0, { } }
+#define DEF_IPUT_WIDE(bb, opcode, reg, obj, field_info) \
+    { bb, opcode, 0u, field_info, 3, { reg, reg + 1, obj }, 0, { } }
+#define DEF_SGET(bb, opcode, reg, field_info) \
+    { bb, opcode, 0u, field_info, 0, { }, 1, { reg } }
+#define DEF_SGET_WIDE(bb, opcode, reg, field_info) \
+    { bb, opcode, 0u, field_info, 0, { }, 2, { reg, reg + 1 } }
+#define DEF_SPUT(bb, opcode, reg, field_info) \
+    { bb, opcode, 0u, field_info, 1, { reg }, 0, { } }
+#define DEF_SPUT_WIDE(bb, opcode, reg, field_info) \
+    { bb, opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } }
+#define DEF_AGET(bb, opcode, reg, obj, idx) \
+    { bb, opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } }
+#define DEF_AGET_WIDE(bb, opcode, reg, obj, idx) \
+    { bb, opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } }
+#define DEF_APUT(bb, opcode, reg, obj, idx) \
+    { bb, opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } }
+#define DEF_APUT_WIDE(bb, opcode, reg, obj, idx) \
+    { bb, opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } }
+#define DEF_INVOKE1(bb, opcode, reg) \
+    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
+#define DEF_UNIQUE_REF(bb, opcode, reg) \
+    { bb, opcode, 0u, 0u, 0, { }, 1, { reg } }  // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
+#define DEF_IFZ(bb, opcode, reg) \
+    { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
+#define DEF_MOVE(bb, opcode, reg, src) \
+    { bb, opcode, 0u, 0u, 1, { src }, 1, { reg } }
+#define DEF_MOVE_WIDE(bb, opcode, reg, src) \
+    { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
+#define DEF_PHI2(bb, reg, src1, src2) \
+    { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
+#define DEF_UNOP(bb, opcode, result, src1) \
+    { bb, opcode, 0u, 0u, 1, { src1 }, 1, { result } }
+#define DEF_BINOP(bb, opcode, result, src1, src2) \
+    { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
+
+  void DoPrepareIFields(const IFieldDef* defs, size_t count) {
+    cu_.mir_graph->ifield_lowering_infos_.clear();
+    cu_.mir_graph->ifield_lowering_infos_.reserve(count);
+    for (size_t i = 0u; i != count; ++i) {
+      const IFieldDef* def = &defs[i];
+      MirIFieldLoweringInfo field_info(def->field_idx, def->type);
+      if (def->declaring_dex_file != 0u) {
+        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
+        field_info.declaring_field_idx_ = def->declaring_field_idx;
+        field_info.flags_ =
+            MirIFieldLoweringInfo::kFlagFastGet | MirIFieldLoweringInfo::kFlagFastPut |
+            (field_info.flags_ & ~(def->is_volatile ? 0u : MirIFieldLoweringInfo::kFlagIsVolatile));
+      }
+      cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
+    }
+  }
+
+  template <size_t count>
+  void PrepareIFields(const IFieldDef (&defs)[count]) {
+    DoPrepareIFields(defs, count);
+  }
+
+  void DoPrepareSFields(const SFieldDef* defs, size_t count) {
+    cu_.mir_graph->sfield_lowering_infos_.clear();
+    cu_.mir_graph->sfield_lowering_infos_.reserve(count);
+    for (size_t i = 0u; i != count; ++i) {
+      const SFieldDef* def = &defs[i];
+      MirSFieldLoweringInfo field_info(def->field_idx, def->type);
+      // Mark even unresolved fields as initialized.
+      field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
+      // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by GVN.
+      if (def->declaring_dex_file != 0u) {
+        field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
+        field_info.declaring_field_idx_ = def->declaring_field_idx;
+        field_info.flags_ =
+            MirSFieldLoweringInfo::kFlagFastGet | MirSFieldLoweringInfo::kFlagFastPut |
+            (field_info.flags_ & ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile));
+      }
+      cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
+    }
+  }
+
+  template <size_t count>
+  void PrepareSFields(const SFieldDef (&defs)[count]) {
+    DoPrepareSFields(defs, count);
+  }
+
+  void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
+    cu_.mir_graph->block_id_map_.clear();
+    cu_.mir_graph->block_list_.clear();
+    ASSERT_LT(3u, count);  // null, entry, exit and at least one bytecode block.
+    ASSERT_EQ(kNullBlock, defs[0].type);
+    ASSERT_EQ(kEntryBlock, defs[1].type);
+    ASSERT_EQ(kExitBlock, defs[2].type);
+    for (size_t i = 0u; i != count; ++i) {
+      const BBDef* def = &defs[i];
+      BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
+      if (def->num_successors <= 2) {
+        bb->successor_block_list_type = kNotUsed;
+        bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
+        bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
+      } else {
+        bb->successor_block_list_type = kPackedSwitch;
+        bb->fall_through = 0u;
+        bb->taken = 0u;
+        bb->successor_blocks.reserve(def->num_successors);
+        for (size_t j = 0u; j != def->num_successors; ++j) {
+          SuccessorBlockInfo* successor_block_info =
+              static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
+                                                               kArenaAllocSuccessor));
+          successor_block_info->block = j;
+          successor_block_info->key = 0u;  // Not used by class init check elimination.
+          bb->successor_blocks.push_back(successor_block_info);
+        }
+      }
+      bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
+      if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
+        bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
+            cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
+        bb->data_flow_info->live_in_v = live_in_v_;
+        bb->data_flow_info->vreg_to_ssa_map_exit = nullptr;
+      }
+    }
+    ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
+    cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
+    ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
+    cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
+    ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
+  }
+
+  template <size_t count>
+  void PrepareBasicBlocks(const BBDef (&defs)[count]) {
+    DoPrepareBasicBlocks(defs, count);
+  }
+
+  int SRegToVReg(int32_t s_reg, bool wide) {
+    int v_reg = cu_.mir_graph->SRegToVReg(s_reg);
+    CHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+    if (wide) {
+      CHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+    }
+    return v_reg;
+  }
+
+  int SRegToVReg(int32_t* uses, size_t* use, bool wide) {
+    int v_reg = SRegToVReg(uses[*use], wide);
+    if (wide) {
+      CHECK_EQ(uses[*use] + 1, uses[*use + 1]);
+      *use += 2u;
+    } else {
+      *use += 1u;
+    }
+    return v_reg;
+  }
+
+  void DoPrepareMIRs(const MIRDef* defs, size_t count) {
+    mir_count_ = count;
+    mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+    ssa_reps_.resize(count);
+    for (size_t i = 0u; i != count; ++i) {
+      const MIRDef* def = &defs[i];
+      MIR* mir = &mirs_[i];
+      ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
+      BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
+      bb->AppendMIR(mir);
+      mir->dalvikInsn.opcode = def->opcode;
+      mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
+      mir->dalvikInsn.vB_wide = def->value;
+      if (IsInstructionIGetOrIPut(def->opcode)) {
+        ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
+        mir->meta.ifield_lowering_info = def->field_info;
+        ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
+                  IGetOrIPutMemAccessType(def->opcode));
+      } else if (IsInstructionSGetOrSPut(def->opcode)) {
+        ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
+        mir->meta.sfield_lowering_info = def->field_info;
+        ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
+                  SGetOrSPutMemAccessType(def->opcode));
+      } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
+        mir->meta.phi_incoming =
+            allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo);
+        ASSERT_EQ(def->num_uses, bb->predecessors.size());
+        std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming);
+      }
+      mir->ssa_rep = &ssa_reps_[i];
+      cu_.mir_graph->AllocateSSAUseData(mir, def->num_uses);
+      std::copy_n(def->uses, def->num_uses, mir->ssa_rep->uses);
+      // Keep mir->ssa_rep->fp_use[.] zero-initialized (false). Not used by DCE, only copied.
+      cu_.mir_graph->AllocateSSADefData(mir, def->num_defs);
+      std::copy_n(def->defs, def->num_defs, mir->ssa_rep->defs);
+      // Keep mir->ssa_rep->fp_def[.] zero-initialized (false). Not used by DCE, only copied.
+      mir->dalvikInsn.opcode = def->opcode;
+      mir->offset = i;  // LVN uses offset only for debug output
+      mir->optimization_flags = 0u;
+      uint64_t df_attrs = MIRGraph::GetDataFlowAttributes(mir);
+      if ((df_attrs & DF_DA) != 0) {
+        CHECK_NE(def->num_defs, 0u);
+        mir->dalvikInsn.vA = SRegToVReg(def->defs[0], (df_attrs & DF_A_WIDE) != 0);
+        bb->data_flow_info->vreg_to_ssa_map_exit[mir->dalvikInsn.vA] = def->defs[0];
+        if ((df_attrs & DF_A_WIDE) != 0) {
+          CHECK_EQ(def->defs[0] + 1, def->defs[1]);
+          bb->data_flow_info->vreg_to_ssa_map_exit[mir->dalvikInsn.vA + 1u] = def->defs[0] + 1;
+        }
+      }
+      if ((df_attrs & (DF_UA | DF_UB | DF_UC)) != 0) {
+        size_t use = 0;
+        if ((df_attrs & DF_UA) != 0) {
+          mir->dalvikInsn.vA = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_A_WIDE) != 0);
+        }
+        if ((df_attrs & DF_UB) != 0) {
+          mir->dalvikInsn.vB = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_B_WIDE) != 0);
+        }
+        if ((df_attrs & DF_UC) != 0) {
+          mir->dalvikInsn.vC = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_C_WIDE) != 0);
+        }
+        DCHECK_EQ(def->num_uses, use);
+      }
+    }
+    DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
+        cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
+    code_item->insns_size_in_code_units_ = 2u * count;
+    code_item->registers_size_ = kMaxVRegs;
+    cu_.mir_graph->current_code_item_ = code_item;
+  }
+
+  template <size_t count>
+  void PrepareMIRs(const MIRDef (&defs)[count]) {
+    DoPrepareMIRs(defs, count);
+  }
+
+  template <size_t count>
+  void PrepareSRegToVRegMap(const int (&map)[count]) {
+    cu_.mir_graph->ssa_base_vregs_.assign(map, map + count);
+    num_vregs_ = *std::max_element(map, map + count) + 1u;
+    AllNodesIterator iterator(cu_.mir_graph.get());
+    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+      if (bb->data_flow_info != nullptr) {
+        bb->data_flow_info->vreg_to_ssa_map_exit = static_cast<int32_t*>(
+            cu_.arena.Alloc(sizeof(int32_t) * num_vregs_, kArenaAllocDFInfo));
+        std::fill_n(bb->data_flow_info->vreg_to_ssa_map_exit, num_vregs_, INVALID_SREG);
+      }
+    }
+  }
+
+  void PerformGVN() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
+    cu_.mir_graph->SSATransformationEnd();
+    cu_.mir_graph->temp_.gvn.ifield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
+    cu_.mir_graph->temp_.gvn.sfield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
+        allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
+    ASSERT_TRUE(gvn_ == nullptr);
+    gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
+                                                           GlobalValueNumbering::kModeGvn));
+    value_names_.resize(mir_count_, 0xffffu);
+    LoopRepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
+    bool change = false;
+    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
+      LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
+      if (lvn != nullptr) {
+        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+          value_names_[mir - mirs_] = lvn->GetValueNumber(mir);
+        }
+      }
+      change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
+      ASSERT_TRUE(gvn_->Good());
+    }
+  }
+
+  void PerformGVNCodeModifications() {
+    ASSERT_TRUE(gvn_ != nullptr);
+    ASSERT_TRUE(gvn_->Good());
+    gvn_->StartPostProcessing();
+    TopologicalSortIterator iterator(cu_.mir_graph.get());
+    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+      LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
+      if (lvn != nullptr) {
+        for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+          uint16_t value_name = lvn->GetValueNumber(mir);
+          ASSERT_EQ(value_name, value_names_[mir - mirs_]);
+        }
+      }
+      bool change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
+      ASSERT_FALSE(change);
+      ASSERT_TRUE(gvn_->Good());
+    }
+  }
+
+  void FillVregToSsaRegExitMaps() {
+    // Fill in vreg_to_ssa_map_exit for each BB.
+    PreOrderDfsIterator iterator(cu_.mir_graph.get());
+    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+      if (bb->block_type == kDalvikByteCode) {
+        CHECK(!bb->predecessors.empty());
+        BasicBlock* pred_bb = cu_.mir_graph->GetBasicBlock(bb->predecessors[0]);
+        for (size_t v_reg = 0; v_reg != num_vregs_; ++v_reg) {
+          if (bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] == INVALID_SREG) {
+            bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] =
+                pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
+          }
+        }
+      }
+    }
+  }
+
+  void PerformDCE() {
+    FillVregToSsaRegExitMaps();
+    cu_.mir_graph->GetNumOfCodeAndTempVRs();
+    dce_.reset(new (allocator_.get()) GvnDeadCodeElimination(gvn_.get(), allocator_.get()));
+    PreOrderDfsIterator iterator(cu_.mir_graph.get());
+    for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+      if (bb->block_type == kDalvikByteCode) {
+        dce_->Apply(bb);
+      }
+    }
+  }
+
+  void PerformGVN_DCE() {
+    PerformGVN();
+    PerformGVNCodeModifications();  // Eliminate null/range checks.
+    PerformDCE();
+  }
+
+  template <size_t count>
+  void ExpectValueNamesNE(const size_t (&indexes)[count]) {
+    for (size_t i1 = 0; i1 != count; ++i1) {
+      size_t idx1 = indexes[i1];
+      for (size_t i2 = i1 + 1; i2 != count; ++i2) {
+        size_t idx2 = indexes[i2];
+        EXPECT_NE(value_names_[idx1], value_names_[idx2]) << idx1 << " " << idx2;
+      }
+    }
+  }
+
+  template <size_t count>
+  void ExpectNoNullCheck(const size_t (&indexes)[count]) {
+    for (size_t i = 0; i != count; ++i) {
+      size_t idx = indexes[i];
+      EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[idx].optimization_flags & MIR_IGNORE_NULL_CHECK)
+          << idx;
+    }
+    size_t num_no_null_ck = 0u;
+    for (size_t i = 0; i != mir_count_; ++i) {
+      if ((mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
+        ++num_no_null_ck;
+      }
+    }
+    EXPECT_EQ(count, num_no_null_ck);
+  }
+
+  GvnDeadCodeEliminationTest()
+      : pool_(),
+        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
+        num_vregs_(0u),
+        mir_count_(0u),
+        mirs_(nullptr),
+        ssa_reps_(),
+        allocator_(),
+        gvn_(),
+        dce_(),
+        value_names_(),
+        live_in_v_(new (&cu_.arena) ArenaBitVector(&cu_.arena, kMaxSsaRegs, false, kBitMapMisc)) {
+    cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
+    cu_.access_flags = kAccStatic;  // Don't let "this" interfere with this test.
+    allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
+    // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
+    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
+        kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
+    // Bind all possible sregs to live vregs for test purposes.
+    live_in_v_->SetInitialBits(kMaxSsaRegs);
+    cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
+    cu_.mir_graph->ssa_subscripts_.reserve(kMaxSsaRegs);
+    for (unsigned int i = 0; i < kMaxSsaRegs; i++) {
+      cu_.mir_graph->ssa_base_vregs_.push_back(i);
+      cu_.mir_graph->ssa_subscripts_.push_back(0);
+    }
+    // Set shorty for a void-returning method without arguments.
+    cu_.shorty = "V";
+  }
+
+  static constexpr size_t kMaxSsaRegs = 16384u;
+  static constexpr size_t kMaxVRegs = 256u;
+
+  ArenaPool pool_;
+  CompilationUnit cu_;
+  size_t num_vregs_;
+  size_t mir_count_;
+  MIR* mirs_;
+  std::vector<SSARepresentation> ssa_reps_;
+  std::unique_ptr<ScopedArenaAllocator> allocator_;
+  std::unique_ptr<GlobalValueNumbering> gvn_;
+  std::unique_ptr<GvnDeadCodeElimination> dce_;
+  std::vector<uint16_t> value_names_;
+  ArenaBitVector* live_in_v_;
+};
+
+constexpr uint16_t GvnDeadCodeEliminationTest::kNoValue;
+
+class GvnDeadCodeEliminationTestSimple : public GvnDeadCodeEliminationTest {
+ public:
+  GvnDeadCodeEliminationTestSimple();
+
+ private:
+  static const BBDef kSimpleBbs[];
+};
+
+const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestSimple::kSimpleBbs[] = {
+    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(3)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(1)),
+};
+
+GvnDeadCodeEliminationTestSimple::GvnDeadCodeEliminationTestSimple()
+    : GvnDeadCodeEliminationTest() {
+  PrepareBasicBlocks(kSimpleBbs);
+}
+
+class GvnDeadCodeEliminationTestDiamond : public GvnDeadCodeEliminationTest {
+ public:
+  GvnDeadCodeEliminationTestDiamond();
+
+ private:
+  static const BBDef kDiamondBbs[];
+};
+
+const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestDiamond::kDiamondBbs[] = {
+    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)),  // Block #3, top of the diamond.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #4, left side.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)),     // Block #5, right side.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)),  // Block #6, bottom.
+};
+
+GvnDeadCodeEliminationTestDiamond::GvnDeadCodeEliminationTestDiamond()
+    : GvnDeadCodeEliminationTest() {
+  PrepareBasicBlocks(kDiamondBbs);
+}
+
+class GvnDeadCodeEliminationTestLoop : public GvnDeadCodeEliminationTest {
+ public:
+  GvnDeadCodeEliminationTestLoop();
+
+ private:
+  static const BBDef kLoopBbs[];
+};
+
+const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestLoop::kLoopBbs[] = {
+    DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+    DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+    DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+    DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)),  // "taken" loops to self.
+    DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+};
+
+GvnDeadCodeEliminationTestLoop::GvnDeadCodeEliminationTestLoop()
+    : GvnDeadCodeEliminationTest() {
+  PrepareBasicBlocks(kLoopBbs);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename1) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
+      DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 3 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  const size_t no_null_ck_indexes[] = { 1, 3 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, true, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the IGET uses the s_reg 0, v_reg 0, defined by mirs_[0].
+  ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
+  EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
+  EXPECT_EQ(0u, mirs_[3].dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename2) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
+      DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
+      DEF_CONST(3, Instruction::CONST, 4u, 1000),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 3, 4 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  const size_t no_null_ck_indexes[] = { 1, 3 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, true, false, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the IGET uses the s_reg 0, v_reg 0, defined by mirs_[0].
+  ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
+  EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
+  EXPECT_EQ(0u, mirs_[3].dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename3) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
+      DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 3 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  const size_t no_null_ck_indexes[] = { 1, 3 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, true, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the NEW_INSTANCE defines the s_reg 2, v_reg 2, originally defined by the move.
+  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+  EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
+  EXPECT_EQ(2u, mirs_[0].dalvikInsn.vA);
+  // Check that the first IGET is using the s_reg 2, v_reg 2.
+  ASSERT_EQ(1, mirs_[1].ssa_rep->num_uses);
+  EXPECT_EQ(2, mirs_[1].ssa_rep->uses[0]);
+  EXPECT_EQ(2u, mirs_[1].dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename4) {
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 1u, 0u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 1u),
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 3u, 1000u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 0, 1 /* high word */ };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 3 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[2]);
+
+  static const bool eliminated[] = {
+      false, true, true, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the NEW_INSTANCE defines the s_reg 2, v_reg 2, originally defined by the move 2u.
+  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+  EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
+  EXPECT_EQ(2u, mirs_[0].dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename5) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 2u, 1u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 4u, 3u),
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 5u, 1000u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 3, 0, 1 /* high word */ };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 5 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+  EXPECT_EQ(value_names_[0], value_names_[4]);
+
+  static const bool eliminated[] = {
+      false, false, false, true, true, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the NEW_INSTANCE defines the s_reg 4, v_reg 3, originally defined by the move 4u.
+  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+  EXPECT_EQ(4, mirs_[0].ssa_rep->defs[0]);
+  EXPECT_EQ(3u, mirs_[0].dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename6) {
+  static const MIRDef mirs[] = {
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
+      DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1 /* high word */, 1, 2 /* high word */ };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+
+  static const bool eliminated[] = {
+      false, true
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the CONST_WIDE defines the s_reg 2, v_reg 1, originally defined by the move 2u.
+  ASSERT_EQ(2, mirs_[0].ssa_rep->num_defs);
+  EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
+  EXPECT_EQ(3, mirs_[0].ssa_rep->defs[1]);
+  EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename7) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+      DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 2u, 0u, 1u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[2]);
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+
+  static const bool eliminated[] = {
+      false, true, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the CONST defines the s_reg 1, v_reg 1, originally defined by the move 1u.
+  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+  EXPECT_EQ(1, mirs_[0].ssa_rep->defs[0]);
+  EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
+  // Check that the ADD_INT inputs are both s_reg1, vreg 1.
+  ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
+  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
+  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[1]);
+  EXPECT_EQ(1u, mirs_[2].dalvikInsn.vB);
+  EXPECT_EQ(1u, mirs_[2].dalvikInsn.vC);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename8) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+      DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 2u, 0u, 1u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[2]);
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+
+  static const bool eliminated[] = {
+      false, true, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the CONST defines the s_reg 1, v_reg 1, originally defined by the move 1u.
+  ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+  EXPECT_EQ(1, mirs_[0].ssa_rep->defs[0]);
+  EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
+  // Check that the ADD_INT_2ADDR was replaced by ADD_INT and inputs are both s_reg 1, vreg 1.
+  EXPECT_EQ(Instruction::ADD_INT, mirs_[2].dalvikInsn.opcode);
+  ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
+  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
+  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[1]);
+  EXPECT_EQ(1u, mirs_[2].dalvikInsn.vB);
+  EXPECT_EQ(1u, mirs_[2].dalvikInsn.vC);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename9) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+      DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 1u, 0u, 0u),
+      DEF_MOVE(3, Instruction::MOVE, 2u, 1u),
+      DEF_CONST(3, Instruction::CONST, 3u, 3000u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 0, 1, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 3 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[1], value_names_[2]);
+
+  static const bool eliminated[] = {
+      false, false, true, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the ADD_INT_2ADDR was replaced by ADD_INT and output is in s_reg 2, vreg 1.
+  EXPECT_EQ(Instruction::ADD_INT, mirs_[1].dalvikInsn.opcode);
+  ASSERT_EQ(2, mirs_[1].ssa_rep->num_uses);
+  EXPECT_EQ(0, mirs_[1].ssa_rep->uses[0]);
+  EXPECT_EQ(0, mirs_[1].ssa_rep->uses[1]);
+  EXPECT_EQ(0u, mirs_[1].dalvikInsn.vB);
+  EXPECT_EQ(0u, mirs_[1].dalvikInsn.vC);
+  ASSERT_EQ(1, mirs_[1].ssa_rep->num_defs);
+  EXPECT_EQ(2, mirs_[1].ssa_rep->defs[0]);
+  EXPECT_EQ(1u, mirs_[1].dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, NoRename1) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 2u, 1u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
+      DEF_CONST(3, Instruction::CONST, 4u, 1000),
+      DEF_IGET(3, Instruction::IGET, 5u, 3u, 1u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 0, 1 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 4, 5 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+
+  const size_t no_null_ck_indexes[] = { 1, 5 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, NoRename2) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 2u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
+      DEF_CONST(3, Instruction::CONST, 4u, 1000),
+      DEF_IGET(3, Instruction::IGET, 5u, 3u, 1u),
+      DEF_CONST(3, Instruction::CONST, 6u, 2000),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2, 0, 3, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 4, 5, 6 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+
+  const size_t no_null_ck_indexes[] = { 1, 5 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, NoRename3) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+      { 1u, 1u, 1u, false, kDexMemAccessWord },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+      DEF_IGET(3, Instruction::IGET, 2u, 0u, 2u),
+      DEF_BINOP(3, Instruction::ADD_INT, 3u, 1u, 2u),
+      DEF_MOVE(3, Instruction::MOVE_OBJECT, 4u, 0u),
+      DEF_IGET(3, Instruction::IGET, 5u, 4u, 1u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 5 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[4]);
+
+  const size_t no_null_ck_indexes[] = { 1, 2, 5 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Simple1) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessObject },
+      { 1u, 1u, 1u, false, kDexMemAccessObject },
+      { 2u, 1u, 2u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 1u),
+      DEF_IGET(3, Instruction::IGET, 3u, 2u, 2u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 0u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 5u, 4u, 1u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 1, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_NE(value_names_[0], value_names_[1]);
+  EXPECT_NE(value_names_[0], value_names_[2]);
+  EXPECT_NE(value_names_[0], value_names_[3]);
+  EXPECT_NE(value_names_[1], value_names_[2]);
+  EXPECT_NE(value_names_[1], value_names_[3]);
+  EXPECT_NE(value_names_[2], value_names_[3]);
+  EXPECT_EQ(value_names_[1], value_names_[4]);
+  EXPECT_EQ(value_names_[2], value_names_[5]);
+
+  EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[4].optimization_flags & MIR_IGNORE_NULL_CHECK);
+  EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[5].optimization_flags & MIR_IGNORE_NULL_CHECK);
+
+  static const bool eliminated[] = {
+      false, false, false, false, true, true
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the sregs have been renamed correctly.
+  ASSERT_EQ(1, mirs_[1].ssa_rep->num_defs);
+  EXPECT_EQ(4, mirs_[1].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[1].ssa_rep->num_uses);
+  EXPECT_EQ(0, mirs_[1].ssa_rep->uses[0]);
+  ASSERT_EQ(1, mirs_[2].ssa_rep->num_defs);
+  EXPECT_EQ(5, mirs_[2].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[2].ssa_rep->num_uses);
+  EXPECT_EQ(4, mirs_[2].ssa_rep->uses[0]);
+  ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
+  EXPECT_EQ(3, mirs_[3].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
+  EXPECT_EQ(5, mirs_[3].ssa_rep->uses[0]);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Simple2) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(3, Instruction::CONST, 1u, 1000),
+      DEF_IGET(3, Instruction::IGET, 2u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 3u, 2u, 1u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 4u, 3u),
+      DEF_IGET(3, Instruction::IGET, 5u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 6u, 5u, 1u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2, 3, 2, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[2], value_names_[5]);
+  EXPECT_EQ(value_names_[3], value_names_[6]);
+
+  const size_t no_null_ck_indexes[] = { 2, 5 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, true, true
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the sregs have been renamed correctly.
+  ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
+  EXPECT_EQ(6, mirs_[3].ssa_rep->defs[0]);
+  ASSERT_EQ(2, mirs_[3].ssa_rep->num_uses);
+  EXPECT_EQ(2, mirs_[3].ssa_rep->uses[0]);
+  EXPECT_EQ(1, mirs_[3].ssa_rep->uses[1]);
+  ASSERT_EQ(1, mirs_[4].ssa_rep->num_defs);
+  EXPECT_EQ(4, mirs_[4].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[4].ssa_rep->num_uses);
+  EXPECT_EQ(6, mirs_[4].ssa_rep->uses[0]);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Simple3) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(3, Instruction::CONST, 1u, 1000),
+      DEF_CONST(3, Instruction::CONST, 2u, 2000),
+      DEF_CONST(3, Instruction::CONST, 3u, 3000),
+      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),  // Simple elimination of ADD+MUL
+      DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),  // allows simple elimination of IGET+SUB.
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 5, 5, 4 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[4], value_names_[9]);
+  EXPECT_EQ(value_names_[5], value_names_[10]);
+  EXPECT_EQ(value_names_[6], value_names_[11]);
+  EXPECT_EQ(value_names_[7], value_names_[12]);
+
+  const size_t no_null_ck_indexes[] = { 4, 9 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, false, false, true, true, true, true
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the sregs have been renamed correctly.
+  ASSERT_EQ(1, mirs_[6].ssa_rep->num_defs);
+  EXPECT_EQ(11, mirs_[6].ssa_rep->defs[0]);  // 6 -> 11
+  ASSERT_EQ(2, mirs_[6].ssa_rep->num_uses);
+  EXPECT_EQ(5, mirs_[6].ssa_rep->uses[0]);
+  EXPECT_EQ(2, mirs_[6].ssa_rep->uses[1]);
+  ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
+  EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]);  // 7 -> 12
+  ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
+  EXPECT_EQ(11, mirs_[7].ssa_rep->uses[0]);  // 6 -> 11
+  EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
+  ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
+  EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
+  EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]);  // 7 -> 12
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Simple4) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 1u, INT64_C(1)),
+      DEF_BINOP(3, Instruction::LONG_TO_FLOAT, 3u, 1u, 2u),
+      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 5u, 4u),
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 6u, INT64_C(1)),
+      DEF_BINOP(3, Instruction::LONG_TO_FLOAT, 8u, 6u, 7u),
+      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3, 1, 2, 1, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[1], value_names_[5]);
+  EXPECT_EQ(value_names_[2], value_names_[6]);
+  EXPECT_EQ(value_names_[3], value_names_[7]);
+
+  const size_t no_null_ck_indexes[] = { 3, 7 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      // Simple elimination of CONST_WIDE+LONG_TO_FLOAT allows simple eliminatiion of IGET.
+      false, false, false, false, false, true, true, true
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the sregs have been renamed correctly.
+  ASSERT_EQ(1, mirs_[2].ssa_rep->num_defs);
+  EXPECT_EQ(8, mirs_[2].ssa_rep->defs[0]);   // 3 -> 8
+  ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
+  EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
+  EXPECT_EQ(2, mirs_[2].ssa_rep->uses[1]);
+  ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
+  EXPECT_EQ(9, mirs_[3].ssa_rep->defs[0]);   // 4 -> 9
+  ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
+  EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
+  ASSERT_EQ(1, mirs_[4].ssa_rep->num_defs);
+  EXPECT_EQ(5, mirs_[4].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[4].ssa_rep->num_uses);
+  EXPECT_EQ(9, mirs_[4].ssa_rep->uses[0]);   // 4 -> 9
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KillChain1) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(3, Instruction::CONST, 1u, 1000),
+      DEF_CONST(3, Instruction::CONST, 2u, 2000),
+      DEF_CONST(3, Instruction::CONST, 3u, 3000),
+      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 4, 5, 6, 4, 5, 4, 5 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[4], value_names_[9]);
+  EXPECT_EQ(value_names_[5], value_names_[10]);
+  EXPECT_EQ(value_names_[6], value_names_[11]);
+  EXPECT_EQ(value_names_[7], value_names_[12]);
+
+  const size_t no_null_ck_indexes[] = { 4, 9 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, false, false, true, true, true, true
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the sregs have been renamed correctly.
+  ASSERT_EQ(1, mirs_[6].ssa_rep->num_defs);
+  EXPECT_EQ(11, mirs_[6].ssa_rep->defs[0]);  // 6 -> 11
+  ASSERT_EQ(2, mirs_[6].ssa_rep->num_uses);
+  EXPECT_EQ(5, mirs_[6].ssa_rep->uses[0]);
+  EXPECT_EQ(2, mirs_[6].ssa_rep->uses[1]);
+  ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
+  EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]);  // 7 -> 12
+  ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
+  EXPECT_EQ(11, mirs_[7].ssa_rep->uses[0]);  // 6 -> 11
+  EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
+  ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
+  EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
+  EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]);   // 7 -> 12
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KillChain2) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(3, Instruction::CONST, 1u, 1000),
+      DEF_CONST(3, Instruction::CONST, 2u, 2000),
+      DEF_CONST(3, Instruction::CONST, 3u, 3000),
+      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
+      DEF_CONST(3, Instruction::CONST, 13u, 4000),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 7, 4, 7 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[4], value_names_[9]);
+  EXPECT_EQ(value_names_[5], value_names_[10]);
+  EXPECT_EQ(value_names_[6], value_names_[11]);
+  EXPECT_EQ(value_names_[7], value_names_[12]);
+
+  const size_t no_null_ck_indexes[] = { 4, 9 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, false, false, true, true, true, true, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the sregs have been renamed correctly.
+  ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
+  EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]);  // 7 -> 12
+  ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
+  EXPECT_EQ(6, mirs_[7].ssa_rep->uses[0]);
+  EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
+  ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
+  EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
+  EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]);   // 7 -> 12
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KillChain3) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(3, Instruction::CONST, 1u, 1000),
+      DEF_CONST(3, Instruction::CONST, 2u, 2000),
+      DEF_CONST(3, Instruction::CONST, 3u, 3000),
+      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
+      DEF_CONST(3, Instruction::CONST, 12u, 4000),
+      DEF_BINOP(3, Instruction::SUB_INT, 13u, 11u, 3u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 4, 7, 4 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 12 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[4], value_names_[9]);
+  EXPECT_EQ(value_names_[5], value_names_[10]);
+  EXPECT_EQ(value_names_[6], value_names_[11]);
+  EXPECT_EQ(value_names_[7], value_names_[13]);
+
+  const size_t no_null_ck_indexes[] = { 4, 9 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, false, false, true, true, true, false, true
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that the sregs have been renamed correctly.
+  ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
+  EXPECT_EQ(13, mirs_[7].ssa_rep->defs[0]);  // 7 -> 13
+  ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
+  EXPECT_EQ(6, mirs_[7].ssa_rep->uses[0]);
+  EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
+  ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
+  EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
+  ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
+  EXPECT_EQ(13, mirs_[8].ssa_rep->uses[0]);   // 7 -> 13
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KeepChain1) {
+  // KillChain2 without the final CONST.
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(3, Instruction::CONST, 1u, 1000),
+      DEF_CONST(3, Instruction::CONST, 2u, 2000),
+      DEF_CONST(3, Instruction::CONST, 3u, 3000),
+      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 7, 4 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[4], value_names_[9]);
+  EXPECT_EQ(value_names_[5], value_names_[10]);
+  EXPECT_EQ(value_names_[6], value_names_[11]);
+  EXPECT_EQ(value_names_[7], value_names_[12]);
+
+  const size_t no_null_ck_indexes[] = { 4, 9 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, false, false, false, false, false, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KeepChain2) {
+  // KillChain1 with MIRs in the middle of the chain.
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(3, Instruction::CONST, 1u, 1000),
+      DEF_CONST(3, Instruction::CONST, 2u, 2000),
+      DEF_CONST(3, Instruction::CONST, 3u, 3000),
+      DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+      DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+      DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+      DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+      DEF_CONST(3, Instruction::CONST, 11u, 4000),
+      DEF_UNOP(3, Instruction::INT_TO_FLOAT, 12u, 11u),
+      DEF_BINOP(3, Instruction::MUL_INT, 13u, 10u, 2u),
+      DEF_BINOP(3, Instruction::SUB_INT, 14u, 13u, 3u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 4, 5, 6, 4, 5, 4, 7, 4, 5 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[4], value_names_[9]);
+  EXPECT_EQ(value_names_[5], value_names_[10]);
+  EXPECT_EQ(value_names_[6], value_names_[13]);
+  EXPECT_EQ(value_names_[7], value_names_[14]);
+
+  const size_t no_null_ck_indexes[] = { 4, 9 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, false, false, false,
+      false, false, false, false, false, false
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi1) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000),
+      DEF_CONST(4, Instruction::CONST, 1u, 1000),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  EXPECT_EQ(value_names_[0], value_names_[1]);
+
+  static const bool eliminated[] = {
+      false, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that we've created a single-input Phi to replace the CONST 3u.
+  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+  MIR* phi = bb4->first_mir_insn;
+  ASSERT_TRUE(phi != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+  ASSERT_EQ(1, phi->ssa_rep->num_uses);
+  EXPECT_EQ(0, phi->ssa_rep->uses[0]);
+  ASSERT_EQ(1, phi->ssa_rep->num_defs);
+  EXPECT_EQ(1, phi->ssa_rep->defs[0]);
+  EXPECT_EQ(0u, phi->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(4, Instruction::CONST, 1u, 1000),
+      DEF_IPUT(4, Instruction::IPUT, 1u, 0u, 0u),
+      DEF_CONST(5, Instruction::CONST, 3u, 2000),
+      DEF_IPUT(5, Instruction::IPUT, 3u, 0u, 0u),
+      DEF_IGET(6, Instruction::IGET, 5u, 0u, 0u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2 /* dummy */, 1, 2 /* dummy */, 1 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 3, 5 };
+  ExpectValueNamesNE(diff_indexes);
+
+  const size_t no_null_ck_indexes[] = { 2, 4, 5 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that we've created a two-input Phi to replace the IGET 5u.
+  BasicBlock* bb6 = cu_.mir_graph->GetBasicBlock(6);
+  MIR* phi = bb6->first_mir_insn;
+  ASSERT_TRUE(phi != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+  ASSERT_EQ(2, phi->ssa_rep->num_uses);
+  EXPECT_EQ(1, phi->ssa_rep->uses[0]);
+  EXPECT_EQ(3, phi->ssa_rep->uses[1]);
+  ASSERT_EQ(1, phi->ssa_rep->num_defs);
+  EXPECT_EQ(5, phi->ssa_rep->defs[0]);
+  EXPECT_EQ(1u, phi->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, KillChainInAnotherBlock1) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessObject },  // linked list
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 3u, 2u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 3u, 0u),
+      DEF_IFZ(3, Instruction::IF_NEZ, 4u),
+      DEF_IGET(4, Instruction::IGET_OBJECT, 6u, 0u, 0u),
+      DEF_IGET(4, Instruction::IGET_OBJECT, 7u, 6u, 0u),
+      DEF_IGET(4, Instruction::IGET_OBJECT, 8u, 7u, 0u),
+      DEF_IGET(4, Instruction::IGET_OBJECT, 9u, 8u, 0u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3 /* dummy */, 1, 2, 1, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[1], value_names_[6]);
+  EXPECT_EQ(value_names_[2], value_names_[7]);
+  EXPECT_EQ(value_names_[3], value_names_[8]);
+  EXPECT_EQ(value_names_[4], value_names_[9]);
+
+  const size_t no_null_ck_indexes[] = { 1, 6, 7, 8, 9 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, true, true, true, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that we've created two single-input Phis to replace the IGET 8u and IGET 9u;
+  // the IGET 6u and IGET 7u were killed without a replacement.
+  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+  MIR* phi1 = bb4->first_mir_insn;
+  ASSERT_TRUE(phi1 != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi1->dalvikInsn.opcode));
+  MIR* phi2 = phi1->next;
+  ASSERT_TRUE(phi2 != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi2->dalvikInsn.opcode));
+  ASSERT_TRUE(phi2->next == &mirs_[6]);
+  if (phi1->dalvikInsn.vA == 2u) {
+    std::swap(phi1, phi2);
+  }
+  ASSERT_EQ(1, phi1->ssa_rep->num_uses);
+  EXPECT_EQ(3, phi1->ssa_rep->uses[0]);
+  ASSERT_EQ(1, phi1->ssa_rep->num_defs);
+  EXPECT_EQ(8, phi1->ssa_rep->defs[0]);
+  EXPECT_EQ(1u, phi1->dalvikInsn.vA);
+  ASSERT_EQ(1, phi2->ssa_rep->num_uses);
+  EXPECT_EQ(4, phi2->ssa_rep->uses[0]);
+  ASSERT_EQ(1, phi2->ssa_rep->num_defs);
+  EXPECT_EQ(9, phi2->ssa_rep->defs[0]);
+  EXPECT_EQ(2u, phi2->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, KillChainInAnotherBlock2) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessObject },  // linked list
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 3u, 2u, 0u),
+      DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 3u, 0u),
+      DEF_IFZ(3, Instruction::IF_NEZ, 4u),
+      DEF_IGET(4, Instruction::IGET_OBJECT, 6u, 0u, 0u),
+      DEF_IGET(4, Instruction::IGET_OBJECT, 7u, 6u, 0u),
+      DEF_IGET(4, Instruction::IGET_OBJECT, 8u, 7u, 0u),
+      DEF_CONST(4, Instruction::CONST, 9u, 1000),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3 /* dummy */, 1, 2, 1, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 9 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[1], value_names_[6]);
+  EXPECT_EQ(value_names_[2], value_names_[7]);
+  EXPECT_EQ(value_names_[3], value_names_[8]);
+
+  const size_t no_null_ck_indexes[] = { 1, 6, 7, 8 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, false, false, true, true, true, false,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that we've created a single-input Phi to replace the IGET 8u;
+  // the IGET 6u and IGET 7u were killed without a replacement.
+  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+  MIR* phi = bb4->first_mir_insn;
+  ASSERT_TRUE(phi != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+  ASSERT_TRUE(phi->next == &mirs_[6]);
+  ASSERT_EQ(1, phi->ssa_rep->num_uses);
+  EXPECT_EQ(3, phi->ssa_rep->uses[0]);
+  ASSERT_EQ(1, phi->ssa_rep->num_defs);
+  EXPECT_EQ(8, phi->ssa_rep->defs[0]);
+  EXPECT_EQ(1u, phi->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestLoop, IFieldLoopVariable) {
+  static const IFieldDef ifields[] = {
+      { 0u, 1u, 0u, false, kDexMemAccessWord },
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+      DEF_CONST(3, Instruction::CONST, 1u, 1),
+      DEF_CONST(3, Instruction::CONST, 2u, 0),
+      DEF_IPUT(3, Instruction::IPUT, 2u, 0u, 0u),
+      DEF_IGET(4, Instruction::IGET, 4u, 0u, 0u),
+      DEF_BINOP(4, Instruction::ADD_INT, 5u, 4u, 1u),
+      DEF_IPUT(4, Instruction::IPUT, 5u, 0u, 0u),
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3 /* dummy */, 2, 2 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 4, 5 };
+  ExpectValueNamesNE(diff_indexes);
+
+  const size_t no_null_ck_indexes[] = { 3, 4, 6 };
+  ExpectNoNullCheck(no_null_ck_indexes);
+
+  static const bool eliminated[] = {
+      false, false, false, false, true, false, false,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+  // Check that we've created a two-input Phi to replace the IGET 3u.
+  BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+  MIR* phi = bb4->first_mir_insn;
+  ASSERT_TRUE(phi != nullptr);
+  ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+  ASSERT_TRUE(phi->next == &mirs_[4]);
+  ASSERT_EQ(2, phi->ssa_rep->num_uses);
+  EXPECT_EQ(2, phi->ssa_rep->uses[0]);
+  EXPECT_EQ(5, phi->ssa_rep->uses[1]);
+  ASSERT_EQ(1, phi->ssa_rep->num_defs);
+  EXPECT_EQ(4, phi->ssa_rep->defs[0]);
+  EXPECT_EQ(2u, phi->dalvikInsn.vA);
+}
+
+}  // namespace art
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 114346dd5a..99b6683b26 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -901,9 +901,9 @@ void LocalValueNumbering::MergeAliasingValues(const typename Map::value_type& en
     // Calculate merged values for the intersection.
     for (auto& load_value_entry : my_values->load_value_map) {
       uint16_t location = load_value_entry.first;
-      bool same_values = true;
-      uint16_t value_name = kNoValue;
       merge_names_.clear();
+      uint16_t value_name = kNoValue;
+      bool same_values = true;
       for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
         value_name = Versions::LookupMergeValue(gvn_, lvn, key, location);
         same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
@@ -937,6 +937,10 @@ void LocalValueNumbering::MergeAliasingValues(const typename Map::value_type& en
 void LocalValueNumbering::Merge(MergeType merge_type) {
   DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
 
+  // Always reserve space in merge_names_. Even if we don't use it in Merge() we may need it
+  // in GetStartingVregValueNumberImpl() when the merge_names_'s allocator is not the top.
+  merge_names_.reserve(gvn_->merge_lvns_.size());
+
   IntersectSregValueMaps<&LocalValueNumbering::sreg_value_map_>();
   IntersectSregValueMaps<&LocalValueNumbering::sreg_wide_value_map_>();
   if (merge_type == kReturnMerge) {
@@ -1169,8 +1173,8 @@ uint16_t LocalValueNumbering::HandlePhi(MIR* mir) {
   int first_s_reg = uses[pos];
   bool wide = (first_lvn->sreg_wide_value_map_.count(first_s_reg) != 0u);
   // Iterate over *merge_lvns_ and skip incoming sregs for BBs without associated LVN.
-  uint16_t value_name = kNoValue;
   merge_names_.clear();
+  uint16_t value_name = kNoValue;
   bool same_values = true;
   for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
     DCHECK_LT(pos, mir->ssa_rep->num_uses);
@@ -1210,6 +1214,31 @@ uint16_t LocalValueNumbering::HandlePhi(MIR* mir) {
   return value_name;
 }
 
+uint16_t LocalValueNumbering::HandleConst(MIR* mir, uint32_t value) {
+  RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
+  uint16_t res;
+  if (value == 0u && raw_dest.ref) {
+    res = GlobalValueNumbering::kNullValue;
+  } else {
+    Instruction::Code op = raw_dest.fp ? Instruction::CONST_HIGH16 : Instruction::CONST;
+    res = gvn_->LookupValue(op, Low16Bits(value), High16Bits(value), 0);
+  }
+  SetOperandValue(mir->ssa_rep->defs[0], res);
+  return res;
+}
+
+uint16_t LocalValueNumbering::HandleConstWide(MIR* mir, uint64_t value) {
+  RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
+  Instruction::Code op = raw_dest.fp ? Instruction::CONST_HIGH16 : Instruction::CONST;
+  uint32_t low_word = Low32Bits(value);
+  uint32_t high_word = High32Bits(value);
+  uint16_t low_res = gvn_->LookupValue(op, Low16Bits(low_word), High16Bits(low_word), 1);
+  uint16_t high_res = gvn_->LookupValue(op, Low16Bits(high_word), High16Bits(high_word), 2);
+  uint16_t res = gvn_->LookupValue(op, low_res, high_res, 3);
+  SetOperandValueWide(mir->ssa_rep->defs[0], res);
+  return res;
+}
+
 uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) {
   uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]);
   HandleNullCheck(mir, array);
@@ -1592,12 +1621,18 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
       break;
     case Instruction::MOVE_EXCEPTION:
     case Instruction::NEW_INSTANCE:
-    case Instruction::CONST_CLASS:
     case Instruction::NEW_ARRAY:
       // 1 result, treat as unique each time, use result s_reg - will be unique.
       res = MarkNonAliasingNonNull(mir);
       SetOperandValue(mir->ssa_rep->defs[0], res);
       break;
+    case Instruction::CONST_CLASS:
+      DCHECK_EQ(Low16Bits(mir->dalvikInsn.vB), mir->dalvikInsn.vB);
+      res = gvn_->LookupValue(Instruction::CONST_CLASS, mir->dalvikInsn.vB, 0, 0);
+      SetOperandValue(mir->ssa_rep->defs[0], res);
+      null_checked_.insert(res);
+      non_aliasing_refs_.insert(res);
+      break;
     case Instruction::CONST_STRING:
     case Instruction::CONST_STRING_JUMBO:
       // These strings are internalized, so assign value based on the string pool index.
@@ -1641,53 +1676,29 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
       SetOperandValueWide(mir->ssa_rep->defs[0], res);
       break;
 
+    case Instruction::CONST_HIGH16:
+      res = HandleConst(mir, mir->dalvikInsn.vB << 16);
+      break;
     case Instruction::CONST:
     case Instruction::CONST_4:
     case Instruction::CONST_16:
-      res = gvn_->LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
-                              High16Bits(mir->dalvikInsn.vB), 0);
-      SetOperandValue(mir->ssa_rep->defs[0], res);
-      break;
-
-    case Instruction::CONST_HIGH16:
-      res = gvn_->LookupValue(Instruction::CONST, 0, mir->dalvikInsn.vB, 0);
-      SetOperandValue(mir->ssa_rep->defs[0], res);
+      res = HandleConst(mir, mir->dalvikInsn.vB);
       break;
 
     case Instruction::CONST_WIDE_16:
-    case Instruction::CONST_WIDE_32: {
-        uint16_t low_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
-                                             High16Bits(mir->dalvikInsn.vB >> 16), 1);
-        uint16_t high_res;
-        if (mir->dalvikInsn.vB & 0x80000000) {
-          high_res = gvn_->LookupValue(Instruction::CONST, 0xffff, 0xffff, 2);
-        } else {
-          high_res = gvn_->LookupValue(Instruction::CONST, 0, 0, 2);
-        }
-        res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
-        SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      }
+    case Instruction::CONST_WIDE_32:
+      res = HandleConstWide(
+          mir,
+          mir->dalvikInsn.vB +
+              ((mir->dalvikInsn.vB & 0x80000000) != 0 ? UINT64_C(0xffffffff00000000) : 0u));
       break;
 
-    case Instruction::CONST_WIDE: {
-        uint32_t low_word = Low32Bits(mir->dalvikInsn.vB_wide);
-        uint32_t high_word = High32Bits(mir->dalvikInsn.vB_wide);
-        uint16_t low_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(low_word),
-                                             High16Bits(low_word), 1);
-        uint16_t high_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(high_word),
-                                              High16Bits(high_word), 2);
-        res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
-        SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      }
+    case Instruction::CONST_WIDE:
+      res = HandleConstWide(mir, mir->dalvikInsn.vB_wide);
       break;
 
-    case Instruction::CONST_WIDE_HIGH16: {
-        uint16_t low_res = gvn_->LookupValue(Instruction::CONST, 0, 0, 1);
-        uint16_t high_res = gvn_->LookupValue(Instruction::CONST, 0,
-                                              Low16Bits(mir->dalvikInsn.vB), 2);
-        res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
-        SetOperandValueWide(mir->ssa_rep->defs[0], res);
-      }
+    case Instruction::CONST_WIDE_HIGH16:
+      res = HandleConstWide(mir, static_cast<uint64_t>(mir->dalvikInsn.vB) << 48);
       break;
 
     case Instruction::ARRAY_LENGTH: {
@@ -1956,4 +1967,55 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
   return res;
 }
 
+uint16_t LocalValueNumbering::GetEndingVregValueNumberImpl(int v_reg, bool wide) const {
+  const BasicBlock* bb = gvn_->GetBasicBlock(Id());
+  DCHECK(bb != nullptr);
+  int s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
+  if (s_reg == INVALID_SREG) {
+    return kNoValue;
+  }
+  if (wide) {
+    int high_s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg + 1];
+    if (high_s_reg != s_reg + 1) {
+      return kNoValue;  // High word has been overwritten.
+    }
+    return GetSregValueWide(s_reg);
+  } else {
+    return GetSregValue(s_reg);
+  }
+}
+
+uint16_t LocalValueNumbering::GetStartingVregValueNumberImpl(int v_reg, bool wide) const {
+  DCHECK_EQ(gvn_->mode_, GlobalValueNumbering::kModeGvnPostProcessing);
+  DCHECK(gvn_->CanModify());
+  const BasicBlock* bb = gvn_->GetBasicBlock(Id());
+  DCHECK(bb != nullptr);
+  DCHECK_NE(bb->predecessors.size(), 0u);
+  if (bb->predecessors.size() == 1u) {
+    return gvn_->GetLvn(bb->predecessors[0])->GetEndingVregValueNumberImpl(v_reg, wide);
+  }
+  merge_names_.clear();
+  uint16_t value_name = kNoValue;
+  bool same_values = true;
+  for (BasicBlockId pred_id : bb->predecessors) {
+    value_name = gvn_->GetLvn(pred_id)->GetEndingVregValueNumberImpl(v_reg, wide);
+    if (value_name == kNoValue) {
+      return kNoValue;
+    }
+    same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
+    merge_names_.push_back(value_name);
+  }
+  if (same_values) {
+    // value_name already contains the result.
+  } else {
+    auto lb = merge_map_.lower_bound(merge_names_);
+    if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
+      value_name = lb->second;
+    } else {
+      value_name = kNoValue;  // We never assigned a value name to this set of merged names.
+    }
+  }
+  return value_name;
+}
+
 }    // namespace art
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index aef8c6df0c..97ea05a914 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -19,9 +19,9 @@
 
 #include <memory>
 
+#include "base/arena_object.h"
 #include "base/logging.h"
 #include "global_value_numbering.h"
-#include "utils/arena_object.h"
 #include "utils/dex_instruction_utils.h"
 
 namespace art {
@@ -52,13 +52,22 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
     return div_zero_checked_.find(value_name) != div_zero_checked_.end();
   }
 
-  bool IsSregValue(uint16_t s_reg, uint16_t value_name) const {
-    auto it = sreg_value_map_.find(s_reg);
-    if (it != sreg_value_map_.end()) {
-      return it->second == value_name;
-    } else {
-      return gvn_->HasValue(kNoValue, s_reg, kNoValue, kNoValue, value_name);
-    }
+  uint16_t GetSregValue(uint16_t s_reg) const {
+    return GetSregValueImpl(s_reg, &sreg_value_map_);
+  }
+
+  uint16_t GetSregValueWide(uint16_t s_reg) const {
+    return GetSregValueImpl(s_reg, &sreg_wide_value_map_);
+  }
+
+  // Get the starting value number for a given dalvik register.
+  uint16_t GetStartingVregValueNumber(int v_reg) const {
+    return GetStartingVregValueNumberImpl(v_reg, false);
+  }
+
+  // Get the starting value number for a given wide dalvik register.
+  uint16_t GetStartingVregValueNumberWide(int v_reg) const {
+    return GetStartingVregValueNumberImpl(v_reg, true);
   }
 
   enum MergeType {
@@ -80,6 +89,20 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
   // Key is s_reg, value is value name.
   typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap;
 
+  uint16_t GetEndingVregValueNumberImpl(int v_reg, bool wide) const;
+  uint16_t GetStartingVregValueNumberImpl(int v_reg, bool wide) const;
+
+  uint16_t GetSregValueImpl(int s_reg, const SregValueMap* map) const {
+    uint16_t res = kNoValue;
+    auto lb = map->find(s_reg);
+    if (lb != map->end()) {
+      res = lb->second;
+    } else {
+      res = gvn_->FindValue(kNoValue, s_reg, kNoValue, kNoValue);
+    }
+    return res;
+  }
+
   void SetOperandValueImpl(uint16_t s_reg, uint16_t value, SregValueMap* map) {
     DCHECK_EQ(map->count(s_reg), 0u) << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file)
         << " LVN id: " << id_ << ", s_reg: " << s_reg;
@@ -285,6 +308,8 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
   void HandleEscapingRef(uint16_t base);
   void HandleInvokeArgs(const MIR* mir, const LocalValueNumbering* mir_lvn);
   uint16_t HandlePhi(MIR* mir);
+  uint16_t HandleConst(MIR* mir, uint32_t value);
+  uint16_t HandleConstWide(MIR* mir, uint64_t value);
   uint16_t HandleAGet(MIR* mir, uint16_t opcode);
   void HandleAPut(MIR* mir, uint16_t opcode);
   uint16_t HandleIGet(MIR* mir, uint16_t opcode);
@@ -370,9 +395,9 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
   ValueNameSet div_zero_checked_;
 
   // Reuse one vector for all merges to avoid leaking too much memory on the ArenaStack.
-  ScopedArenaVector<BasicBlockId> merge_names_;
+  mutable ScopedArenaVector<uint16_t> merge_names_;
   // Map to identify when different locations merge the same values.
-  ScopedArenaSafeMap<ScopedArenaVector<BasicBlockId>, uint16_t> merge_map_;
+  ScopedArenaSafeMap<ScopedArenaVector<uint16_t>, uint16_t> merge_map_;
   // New memory version for merge, kNoValue if all memory versions matched.
   uint16_t merge_new_memory_version_;
 
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index c89489287f..d1c3a6b4ba 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -136,7 +136,7 @@ class LocalValueNumberingTest : public testing::Test {
 
   void DoPrepareMIRs(const MIRDef* defs, size_t count) {
     mir_count_ = count;
-    mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+    mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
     ssa_reps_.resize(count);
     for (size_t i = 0u; i != count; ++i) {
       const MIRDef* def = &defs[i];
@@ -185,9 +185,9 @@ class LocalValueNumberingTest : public testing::Test {
   }
 
   void PerformLVN() {
-    cu_.mir_graph->temp_.gvn.ifield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+    cu_.mir_graph->temp_.gvn.ifield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
         allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
-    cu_.mir_graph->temp_.gvn.sfield_ids_ =  GlobalValueNumbering::PrepareGvnFieldIds(
+    cu_.mir_graph->temp_.gvn.sfield_ids =  GlobalValueNumbering::PrepareGvnFieldIds(
         allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
     gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
                                                            GlobalValueNumbering::kModeLvn));
@@ -211,8 +211,14 @@ class LocalValueNumberingTest : public testing::Test {
         value_names_() {
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
     allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
+    // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
+    // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+    cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
+        kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
   }
 
+  static constexpr size_t kMaxSsaRegs = 16384u;
+
   ArenaPool pool_;
   CompilationUnit cu_;
   size_t mir_count_;
@@ -772,4 +778,116 @@ TEST_F(LocalValueNumberingTest, DivZeroCheck) {
   }
 }
 
+TEST_F(LocalValueNumberingTest, ConstWide) {
+  static const MIRDef mirs[] = {
+      // Core reg constants.
+      DEF_CONST(Instruction::CONST_WIDE_16, 0u, 0),
+      DEF_CONST(Instruction::CONST_WIDE_16, 1u, 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 2u, -1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 3u, 1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 4u, -1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 5u, (1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 6u, (1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 7u, -(1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 8u, -(1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 9u, INT64_C(1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 10u, INT64_C(-1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 11u, (INT64_C(1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 12u, (INT64_C(1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 13u, (INT64_C(-1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 14u, (INT64_C(-1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 15u, 1),       // Effectively 1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 16u, 0xffff),  // Effectively -1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE, 17u, (INT64_C(1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 18u, (INT64_C(1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 19u, (INT64_C(-1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 20u, (INT64_C(-1) << 48) - 1),
+      // FP reg constants.
+      DEF_CONST(Instruction::CONST_WIDE_16, 21u, 0),
+      DEF_CONST(Instruction::CONST_WIDE_16, 22u, 1),
+      DEF_CONST(Instruction::CONST_WIDE_16, 23u, -1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 24u, 1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 25u, -1 << 16),
+      DEF_CONST(Instruction::CONST_WIDE_32, 26u, (1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 27u, (1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 28u, -(1 << 16) + 1),
+      DEF_CONST(Instruction::CONST_WIDE_32, 29u, -(1 << 16) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 30u, INT64_C(1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 31u, INT64_C(-1) << 32),
+      DEF_CONST(Instruction::CONST_WIDE, 32u, (INT64_C(1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 33u, (INT64_C(1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(-1) << 32) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 35u, (INT64_C(-1) << 32) - 1),
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 36u, 1),       // Effectively 1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE_HIGH16, 37u, 0xffff),  // Effectively -1 << 48.
+      DEF_CONST(Instruction::CONST_WIDE, 38u, (INT64_C(1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 39u, (INT64_C(1) << 48) - 1),
+      DEF_CONST(Instruction::CONST_WIDE, 40u, (INT64_C(-1) << 48) + 1),
+      DEF_CONST(Instruction::CONST_WIDE, 41u, (INT64_C(-1) << 48) - 1),
+  };
+
+  PrepareMIRs(mirs);
+  for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs); ++i) {
+    cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true;
+  }
+  PerformLVN();
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    for (size_t j = i + 1u; j != mir_count_; ++j) {
+      EXPECT_NE(value_names_[i], value_names_[j]) << i << " " << j;
+    }
+  }
+}
+
+TEST_F(LocalValueNumberingTest, Const) {
+  static const MIRDef mirs[] = {
+      // Core reg constants.
+      DEF_CONST(Instruction::CONST_4, 0u, 0),
+      DEF_CONST(Instruction::CONST_4, 1u, 1),
+      DEF_CONST(Instruction::CONST_4, 2u, -1),
+      DEF_CONST(Instruction::CONST_16, 3u, 1 << 4),
+      DEF_CONST(Instruction::CONST_16, 4u, -1 << 4),
+      DEF_CONST(Instruction::CONST_16, 5u, (1 << 4) + 1),
+      DEF_CONST(Instruction::CONST_16, 6u, (1 << 4) - 1),
+      DEF_CONST(Instruction::CONST_16, 7u, -(1 << 4) + 1),
+      DEF_CONST(Instruction::CONST_16, 8u, -(1 << 4) - 1),
+      DEF_CONST(Instruction::CONST_HIGH16, 9u, 1),       // Effectively 1 << 16.
+      DEF_CONST(Instruction::CONST_HIGH16, 10u, 0xffff),  // Effectively -1 << 16.
+      DEF_CONST(Instruction::CONST, 11u, (1 << 16) + 1),
+      DEF_CONST(Instruction::CONST, 12u, (1 << 16) - 1),
+      DEF_CONST(Instruction::CONST, 13u, (-1 << 16) + 1),
+      DEF_CONST(Instruction::CONST, 14u, (-1 << 16) - 1),
+      // FP reg constants.
+      DEF_CONST(Instruction::CONST_4, 15u, 0),
+      DEF_CONST(Instruction::CONST_4, 16u, 1),
+      DEF_CONST(Instruction::CONST_4, 17u, -1),
+      DEF_CONST(Instruction::CONST_16, 18u, 1 << 4),
+      DEF_CONST(Instruction::CONST_16, 19u, -1 << 4),
+      DEF_CONST(Instruction::CONST_16, 20u, (1 << 4) + 1),
+      DEF_CONST(Instruction::CONST_16, 21u, (1 << 4) - 1),
+      DEF_CONST(Instruction::CONST_16, 22u, -(1 << 4) + 1),
+      DEF_CONST(Instruction::CONST_16, 23u, -(1 << 4) - 1),
+      DEF_CONST(Instruction::CONST_HIGH16, 24u, 1),       // Effectively 1 << 16.
+      DEF_CONST(Instruction::CONST_HIGH16, 25u, 0xffff),  // Effectively -1 << 16.
+      DEF_CONST(Instruction::CONST, 26u, (1 << 16) + 1),
+      DEF_CONST(Instruction::CONST, 27u, (1 << 16) - 1),
+      DEF_CONST(Instruction::CONST, 28u, (-1 << 16) + 1),
+      DEF_CONST(Instruction::CONST, 29u, (-1 << 16) - 1),
+      // null reference constant.
+      DEF_CONST(Instruction::CONST_4, 30u, 0),
+  };
+
+  PrepareMIRs(mirs);
+  static_assert((arraysize(mirs) & 1) != 0, "missing null or unmatched fp/core");
+  cu_.mir_graph->reg_location_[arraysize(mirs) - 1].ref = true;
+  for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs) - 1; ++i) {
+    cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true;
+  }
+  PerformLVN();
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    for (size_t j = i + 1u; j != mir_count_; ++j) {
+      EXPECT_NE(value_names_[i], value_names_[j]) << i << " " << j;
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 473196b98a..31dbc60594 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -18,6 +18,7 @@
 #include <memory>
 
 #include "base/logging.h"
+#include "base/scoped_arena_containers.h"
 #include "dataflow_iterator-inl.h"
 #include "compiler_ir.h"
 #include "dex_flags.h"
@@ -29,7 +30,6 @@
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
-#include "utils/scoped_arena_containers.h"
 
 namespace art {
 
@@ -1206,10 +1206,8 @@ void MIRGraph::DoCacheFieldLoweringInfo() {
   // All IGET/IPUT/SGET/SPUT instructions take 2 code units and there must also be a RETURN.
   const uint32_t max_refs = (GetNumDalvikInsns() - 1u) / 2u;
   ScopedArenaAllocator allocator(&cu_->arena_stack);
-  uint16_t* field_idxs =
-      reinterpret_cast<uint16_t*>(allocator.Alloc(max_refs * sizeof(uint16_t), kArenaAllocMisc));
-  DexMemAccessType* field_types = reinterpret_cast<DexMemAccessType*>(
-      allocator.Alloc(max_refs * sizeof(DexMemAccessType), kArenaAllocMisc));
+  uint16_t* field_idxs = allocator.AllocArray<uint16_t>(max_refs, kArenaAllocMisc);
+  DexMemAccessType* field_types = allocator.AllocArray<DexMemAccessType>(max_refs, kArenaAllocMisc);
 
   // Find IGET/IPUT/SGET/SPUT insns, store IGET/IPUT fields at the beginning, SGET/SPUT at the end.
   size_t ifield_pos = 0u;
@@ -1328,8 +1326,8 @@ void MIRGraph::DoCacheMethodLoweringInfo() {
   // multi_index_container with one ordered index and one sequential index.
   ScopedArenaSet<MapEntry, MapEntryComparator> invoke_map(MapEntryComparator(),
                                                           allocator.Adapter());
-  const MapEntry** sequential_entries = reinterpret_cast<const MapEntry**>(
-      allocator.Alloc(max_refs * sizeof(sequential_entries[0]), kArenaAllocMisc));
+  const MapEntry** sequential_entries =
+      allocator.AllocArray<const MapEntry*>(max_refs, kArenaAllocMisc);
 
   // Find INVOKE insns and their devirtualization targets.
   AllNodesIterator iter(this);
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index f09d1ae6d0..f9f7e22b03 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -910,11 +910,6 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
   DF_FORMAT_EXTENDED,
 };
 
-/* Return the base virtual register for a SSA name */
-int MIRGraph::SRegToVReg(int ssa_reg) const {
-  return ssa_base_vregs_[ssa_reg];
-}
-
 /* Any register that is used before being defined is considered live-in */
 void MIRGraph::HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v,
                                ArenaBitVector* live_in_v, int dalvik_reg_id) {
@@ -1084,9 +1079,9 @@ void MIRGraph::AllocateSSAUseData(MIR *mir, int num_uses) {
   mir->ssa_rep->num_uses = num_uses;
 
   if (mir->ssa_rep->num_uses_allocated < num_uses) {
-    mir->ssa_rep->uses = static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, kArenaAllocDFInfo));
+    mir->ssa_rep->uses = arena_->AllocArray<int32_t>(num_uses, kArenaAllocDFInfo);
     // NOTE: will be filled in during type & size inference pass
-    mir->ssa_rep->fp_use = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, kArenaAllocDFInfo));
+    mir->ssa_rep->fp_use = arena_->AllocArray<bool>(num_uses, kArenaAllocDFInfo);
   }
 }
 
@@ -1094,10 +1089,8 @@ void MIRGraph::AllocateSSADefData(MIR *mir, int num_defs) {
   mir->ssa_rep->num_defs = num_defs;
 
   if (mir->ssa_rep->num_defs_allocated < num_defs) {
-    mir->ssa_rep->defs = static_cast<int*>(arena_->Alloc(sizeof(int) * num_defs,
-          kArenaAllocDFInfo));
-    mir->ssa_rep->fp_def = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_defs,
-          kArenaAllocDFInfo));
+    mir->ssa_rep->defs = arena_->AllocArray<int32_t>(num_defs, kArenaAllocDFInfo);
+    mir->ssa_rep->fp_def = arena_->AllocArray<bool>(num_defs, kArenaAllocDFInfo);
   }
 }
 
@@ -1198,11 +1191,30 @@ void MIRGraph::DataFlowSSAFormatExtended(MIR* mir) {
 
 /* Entry function to convert a block into SSA representation */
 bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
-  MIR* mir;
-
   if (bb->data_flow_info == NULL) return false;
 
-  for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
+  /*
+   * Pruned SSA form: Insert phi nodes for each dalvik register marked in phi_node_blocks
+   * only if the dalvik register is in the live-in set.
+   */
+  BasicBlockId bb_id = bb->id;
+  for (int dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
+    if (temp_.ssa.phi_node_blocks[dalvik_reg]->IsBitSet(bb_id)) {
+      if (!bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) {
+        /* Variable will be clobbered before being used - no need for phi */
+        vreg_to_ssa_map_[dalvik_reg] = INVALID_SREG;
+        continue;
+      }
+      MIR *phi = NewMIR();
+      phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
+      phi->dalvikInsn.vA = dalvik_reg;
+      phi->offset = bb->start_offset;
+      phi->m_unit_index = 0;  // Arbitrarily assign all Phi nodes to outermost method.
+      bb->PrependMIR(phi);
+    }
+  }
+
+  for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     mir->ssa_rep =
         static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation),
                                                               kArenaAllocDFInfo));
@@ -1315,8 +1327,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
    * predecessor blocks.
    */
   bb->data_flow_info->vreg_to_ssa_map_exit =
-      static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumOfCodeAndTempVRs(),
-                                      kArenaAllocDFInfo));
+      arena_->AllocArray<int32_t>(GetNumOfCodeAndTempVRs(), kArenaAllocDFInfo);
 
   memcpy(bb->data_flow_info->vreg_to_ssa_map_exit, vreg_to_ssa_map_,
          sizeof(int) * GetNumOfCodeAndTempVRs());
@@ -1368,13 +1379,9 @@ void MIRGraph::CompilerInitializeSSAConversion() {
    * Initialize the DalvikToSSAMap map. There is one entry for each
    * Dalvik register, and the SSA names for those are the same.
    */
-  vreg_to_ssa_map_ =
-      static_cast<int*>(arena_->Alloc(sizeof(int) * num_reg,
-                                      kArenaAllocDFInfo));
+  vreg_to_ssa_map_ = arena_->AllocArray<int32_t>(num_reg, kArenaAllocDFInfo);
   /* Keep track of the higest def for each dalvik reg */
-  ssa_last_defs_ =
-      static_cast<int*>(arena_->Alloc(sizeof(int) * num_reg,
-                                      kArenaAllocDFInfo));
+  ssa_last_defs_ = arena_->AllocArray<int>(num_reg, kArenaAllocDFInfo);
 
   for (unsigned int i = 0; i < num_reg; i++) {
     vreg_to_ssa_map_[i] = i;
diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h
index ff427f88d0..98b2da8299 100644
--- a/compiler/dex/mir_field_info.h
+++ b/compiler/dex/mir_field_info.h
@@ -149,6 +149,7 @@ class MirIFieldLoweringInfo : public MirFieldInfo {
 
   friend class NullCheckEliminationTest;
   friend class GlobalValueNumberingTest;
+  friend class GvnDeadCodeEliminationTest;
   friend class LocalValueNumberingTest;
 };
 
@@ -223,6 +224,7 @@ class MirSFieldLoweringInfo : public MirFieldInfo {
 
   friend class ClassInitCheckEliminationTest;
   friend class GlobalValueNumberingTest;
+  friend class GvnDeadCodeEliminationTest;
   friend class LocalValueNumberingTest;
 };
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 0f7d45df79..76b5e44df0 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -24,6 +24,7 @@
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
+#include "base/scoped_arena_containers.h"
 #include "compiler_ir.h"
 #include "dex_file-inl.h"
 #include "dex_flags.h"
@@ -34,7 +35,6 @@
 #include "leb128.h"
 #include "pass_driver_me_post_opt.h"
 #include "stack.h"
-#include "utils/scoped_arena_containers.h"
 
 namespace art {
 
@@ -113,7 +113,6 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
       entry_block_(NULL),
       exit_block_(NULL),
       current_code_item_(NULL),
-      dex_pc_to_block_map_(arena->Adapter()),
       m_units_(arena->Adapter()),
       method_stack_(arena->Adapter()),
       current_method_(kInvalidEntry),
@@ -268,31 +267,14 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
   DCHECK(insn != orig_block->first_mir_insn);
   DCHECK(insn == bottom_block->first_mir_insn);
   DCHECK_EQ(insn->offset, bottom_block->start_offset);
-  DCHECK_EQ(dex_pc_to_block_map_[insn->offset], orig_block->id);
   // Scan the "bottom" instructions, remapping them to the
   // newly created "bottom" block.
   MIR* p = insn;
   p->bb = bottom_block->id;
-  dex_pc_to_block_map_[p->offset] = bottom_block->id;
   while (p != bottom_block->last_mir_insn) {
     p = p->next;
     DCHECK(p != nullptr);
     p->bb = bottom_block->id;
-    int opcode = p->dalvikInsn.opcode;
-    /*
-     * Some messiness here to ensure that we only enter real opcodes and only the
-     * first half of a potentially throwing instruction that has been split into
-     * CHECK and work portions. Since the 2nd half of a split operation is always
-     * the first in a BasicBlock, we can't hit it here.
-     */
-    if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-      BasicBlockId mapped_id = dex_pc_to_block_map_[p->offset];
-      // At first glance the instructions should all be mapped to orig_block.
-      // However, multiple instructions may correspond to the same dex, hence an earlier
-      // instruction may have already moved the mapping for dex to bottom_block.
-      DCHECK((mapped_id == orig_block->id) || (mapped_id == bottom_block->id));
-      dex_pc_to_block_map_[p->offset] = bottom_block->id;
-    }
   }
 
   return bottom_block;
@@ -307,12 +289,13 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
  * Utilizes a map for fast lookup of the typical cases.
  */
 BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create,
-                                BasicBlock** immed_pred_block_p) {
+                                BasicBlock** immed_pred_block_p,
+                                ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
   if (code_offset >= current_code_item_->insns_size_in_code_units_) {
     return nullptr;
   }
 
-  int block_id = dex_pc_to_block_map_[code_offset];
+  int block_id = (*dex_pc_to_block_map)[code_offset];
   BasicBlock* bb = GetBasicBlock(block_id);
 
   if ((bb != nullptr) && (bb->start_offset == code_offset)) {
@@ -327,19 +310,46 @@ BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create,
 
   if (bb != nullptr) {
     // The target exists somewhere in an existing block.
-    return SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?  immed_pred_block_p : nullptr);
+    BasicBlock* bottom_block = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ?  immed_pred_block_p : nullptr);
+    DCHECK(bottom_block != nullptr);
+    MIR* p = bottom_block->first_mir_insn;
+    BasicBlock* orig_block = bb;
+    DCHECK_EQ((*dex_pc_to_block_map)[p->offset], orig_block->id);
+    // Scan the "bottom" instructions, remapping them to the
+    // newly created "bottom" block.
+    (*dex_pc_to_block_map)[p->offset] = bottom_block->id;
+    while (p != bottom_block->last_mir_insn) {
+      p = p->next;
+      DCHECK(p != nullptr);
+      int opcode = p->dalvikInsn.opcode;
+      /*
+       * Some messiness here to ensure that we only enter real opcodes and only the
+       * first half of a potentially throwing instruction that has been split into
+       * CHECK and work portions. Since the 2nd half of a split operation is always
+       * the first in a BasicBlock, we can't hit it here.
+       */
+      if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
+        BasicBlockId mapped_id = (*dex_pc_to_block_map)[p->offset];
+        // At first glance the instructions should all be mapped to orig_block.
+        // However, multiple instructions may correspond to the same dex, hence an earlier
+        // instruction may have already moved the mapping for dex to bottom_block.
+        DCHECK((mapped_id == orig_block->id) || (mapped_id == bottom_block->id));
+        (*dex_pc_to_block_map)[p->offset] = bottom_block->id;
+      }
+    }
+    return bottom_block;
   }
 
   // Create a new block.
   bb = CreateNewBB(kDalvikByteCode);
   bb->start_offset = code_offset;
-  dex_pc_to_block_map_[bb->start_offset] = bb->id;
+  (*dex_pc_to_block_map)[bb->start_offset] = bb->id;
   return bb;
 }
 
 
 /* Identify code range in try blocks and set up the empty catch blocks */
-void MIRGraph::ProcessTryCatchBlocks() {
+void MIRGraph::ProcessTryCatchBlocks(ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
   int tries_size = current_code_item_->tries_size_;
   DexOffset offset;
 
@@ -364,7 +374,7 @@ void MIRGraph::ProcessTryCatchBlocks() {
     CatchHandlerIterator iterator(handlers_ptr);
     for (; iterator.HasNext(); iterator.Next()) {
       uint32_t address = iterator.GetHandlerAddress();
-      FindBlock(address, true /*create*/, /* immed_pred_block_p */ nullptr);
+      FindBlock(address, true /*create*/, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
     }
     handlers_ptr = iterator.EndDataPointer();
   }
@@ -439,7 +449,8 @@ bool MIRGraph::IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset,
 /* Process instructions with the kBranch flag */
 BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
                                        int width, int flags, const uint16_t* code_ptr,
-                                       const uint16_t* code_end) {
+                                       const uint16_t* code_end,
+                                       ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
   DexOffset target = cur_offset;
   switch (insn->dalvikInsn.opcode) {
     case Instruction::GOTO:
@@ -470,7 +481,8 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs
   }
   CountBranch(target);
   BasicBlock* taken_block = FindBlock(target, /* create */ true,
-                                      /* immed_pred_block_p */ &cur_block);
+                                      /* immed_pred_block_p */ &cur_block,
+                                      dex_pc_to_block_map);
   cur_block->taken = taken_block->id;
   taken_block->predecessors.push_back(cur_block->id);
 
@@ -480,18 +492,20 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs
                                              /* create */
                                              true,
                                              /* immed_pred_block_p */
-                                             &cur_block);
+                                             &cur_block,
+                                             dex_pc_to_block_map);
     cur_block->fall_through = fallthrough_block->id;
     fallthrough_block->predecessors.push_back(cur_block->id);
   } else if (code_ptr < code_end) {
-    FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr);
+    FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
   }
   return cur_block;
 }
 
 /* Process instructions with the kSwitch flag */
 BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
-                                       int width, int flags) {
+                                       int width, int flags,
+                                       ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
   UNUSED(flags);
   const uint16_t* switch_data =
       reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB);
@@ -545,7 +559,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs
 
   for (i = 0; i < size; i++) {
     BasicBlock* case_block = FindBlock(cur_offset + target_table[i],  /* create */ true,
-                                       /* immed_pred_block_p */ &cur_block);
+                                       /* immed_pred_block_p */ &cur_block,
+                                       dex_pc_to_block_map);
     SuccessorBlockInfo* successor_block_info =
         static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo),
                                                        kArenaAllocSuccessor));
@@ -559,7 +574,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs
 
   /* Fall-through case */
   BasicBlock* fallthrough_block = FindBlock(cur_offset +  width, /* create */ true,
-                                            /* immed_pred_block_p */ nullptr);
+                                            /* immed_pred_block_p */ nullptr,
+                                            dex_pc_to_block_map);
   cur_block->fall_through = fallthrough_block->id;
   fallthrough_block->predecessors.push_back(cur_block->id);
   return cur_block;
@@ -568,7 +584,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs
 /* Process instructions with the kThrow flag */
 BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
                                       int width, int flags, ArenaBitVector* try_block_addr,
-                                      const uint16_t* code_ptr, const uint16_t* code_end) {
+                                      const uint16_t* code_ptr, const uint16_t* code_end,
+                                      ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
   UNUSED(flags);
   bool in_try_block = try_block_addr->IsBitSet(cur_offset);
   bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
@@ -585,7 +602,8 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
 
     for (; iterator.HasNext(); iterator.Next()) {
       BasicBlock* catch_block = FindBlock(iterator.GetHandlerAddress(), false /* create */,
-                                          nullptr /* immed_pred_block_p */);
+                                          nullptr /* immed_pred_block_p */,
+                                          dex_pc_to_block_map);
       if (insn->dalvikInsn.opcode == Instruction::MONITOR_EXIT &&
           IsBadMonitorExitCatch(insn->offset, catch_block->start_offset)) {
         // Don't allow monitor-exit to catch its own exception, http://b/15745363 .
@@ -620,7 +638,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
     cur_block->explicit_throw = true;
     if (code_ptr < code_end) {
       // Force creation of new block following THROW via side-effect.
-      FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr);
+      FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
     }
     if (!in_try_block) {
        // Don't split a THROW that can't rethrow - we're done.
@@ -652,7 +670,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
    * not automatically terminated after the work portion, and may
    * contain following instructions.
    *
-   * Note also that the dex_pc_to_block_map_ entry for the potentially
+   * Note also that the dex_pc_to_block_map entry for the potentially
    * throwing instruction will refer to the original basic block.
    */
   BasicBlock* new_block = CreateNewBB(kDalvikByteCode);
@@ -687,7 +705,11 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
   // TODO: need to rework expansion of block list & try_block_addr when inlining activated.
   // TUNING: use better estimate of basic blocks for following resize.
   block_list_.reserve(block_list_.size() + current_code_item_->insns_size_in_code_units_);
-  dex_pc_to_block_map_.resize(dex_pc_to_block_map_.size() + current_code_item_->insns_size_in_code_units_);
+  // FindBlock lookup cache.
+  ScopedArenaAllocator allocator(&cu_->arena_stack);
+  ScopedArenaVector<uint16_t> dex_pc_to_block_map(allocator.Adapter());
+  dex_pc_to_block_map.resize(dex_pc_to_block_map.size() +
+                             current_code_item_->insns_size_in_code_units_);
 
   // TODO: replace with explicit resize routine.  Using automatic extension side effect for now.
   try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
@@ -728,7 +750,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
   cur_block->predecessors.push_back(entry_block_->id);
 
   /* Identify code range in try blocks and set up the empty catch blocks */
-  ProcessTryCatchBlocks();
+  ProcessTryCatchBlocks(&dex_pc_to_block_map);
 
   uint64_t merged_df_flags = 0u;
 
@@ -777,20 +799,21 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
         DCHECK(cur_block->taken == NullBasicBlockId);
         // Unreachable instruction, mark for no continuation and end basic block.
         flags &= ~Instruction::kContinue;
-        FindBlock(current_offset_ + width, /* create */ true, /* immed_pred_block_p */ nullptr);
+        FindBlock(current_offset_ + width, /* create */ true,
+                  /* immed_pred_block_p */ nullptr, &dex_pc_to_block_map);
       }
     } else {
       cur_block->AppendMIR(insn);
     }
 
     // Associate the starting dex_pc for this opcode with its containing basic block.
-    dex_pc_to_block_map_[insn->offset] = cur_block->id;
+    dex_pc_to_block_map[insn->offset] = cur_block->id;
 
     code_ptr += width;
 
     if (flags & Instruction::kBranch) {
       cur_block = ProcessCanBranch(cur_block, insn, current_offset_,
-                                   width, flags, code_ptr, code_end);
+                                   width, flags, code_ptr, code_end, &dex_pc_to_block_map);
     } else if (flags & Instruction::kReturn) {
       cur_block->terminated_by_return = true;
       cur_block->fall_through = exit_block_->id;
@@ -804,13 +827,15 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
          * Create a fallthrough block for real instructions
          * (incl. NOP).
          */
-         FindBlock(current_offset_ + width, /* create */ true, /* immed_pred_block_p */ nullptr);
+         FindBlock(current_offset_ + width, /* create */ true,
+                   /* immed_pred_block_p */ nullptr, &dex_pc_to_block_map);
       }
     } else if (flags & Instruction::kThrow) {
       cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_,
-                                  code_ptr, code_end);
+                                  code_ptr, code_end, &dex_pc_to_block_map);
     } else if (flags & Instruction::kSwitch) {
-      cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
+      cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width,
+                                   flags, &dex_pc_to_block_map);
     }
     if (verify_flags & Instruction::kVerifyVarArgRange ||
         verify_flags & Instruction::kVerifyVarArgRangeNonZero) {
@@ -828,7 +853,8 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
     }
     current_offset_ += width;
     BasicBlock* next_block = FindBlock(current_offset_, /* create */ false,
-                                       /* immed_pred_block_p */ nullptr);
+                                       /* immed_pred_block_p */ nullptr,
+                                       &dex_pc_to_block_map);
     if (next_block) {
       /*
        * The next instruction could be the target of a previously parsed
@@ -1573,7 +1599,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
     }
   }
   int length = str.length() + 1;
-  ret = static_cast<char*>(arena_->Alloc(length, kArenaAllocDFInfo));
+  ret = arena_->AllocArray<char>(length, kArenaAllocDFInfo);
   strncpy(ret, str.c_str(), length);
   return ret;
 }
@@ -1710,9 +1736,9 @@ CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type,
     move_result_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
   }
   info->num_arg_words = mir->ssa_rep->num_uses;
-  info->args = (info->num_arg_words == 0) ? NULL : static_cast<RegLocation*>
-      (arena_->Alloc(sizeof(RegLocation) * info->num_arg_words, kArenaAllocMisc));
-  for (int i = 0; i < info->num_arg_words; i++) {
+  info->args = (info->num_arg_words == 0) ? nullptr :
+      arena_->AllocArray<RegLocation>(info->num_arg_words, kArenaAllocMisc);
+  for (size_t i = 0; i < info->num_arg_words; i++) {
     info->args[i] = GetRawSrc(mir, i);
   }
   info->opt_flags = mir->optimization_flags;
@@ -1742,7 +1768,7 @@ BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) {
 
 void MIRGraph::InitializeConstantPropagation() {
   is_constant_v_ = new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false);
-  constant_values_ = static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumSSARegs(), kArenaAllocDFInfo));
+  constant_values_ = arena_->AllocArray<int>(GetNumSSARegs(), kArenaAllocDFInfo);
 }
 
 void MIRGraph::InitializeMethodUses() {
@@ -1772,7 +1798,8 @@ void MIRGraph::SSATransformationEnd() {
 
   temp_.ssa.num_vregs = 0u;
   temp_.ssa.work_live_vregs = nullptr;
-  temp_.ssa.def_block_matrix = nullptr;
+  DCHECK(temp_.ssa.def_block_matrix == nullptr);
+  temp_.ssa.phi_node_blocks = nullptr;
   DCHECK(temp_scoped_alloc_.get() != nullptr);
   temp_scoped_alloc_.reset();
 
@@ -2532,4 +2559,13 @@ const uint16_t* MIRGraph::GetInsns(int m_unit_index) const {
   return m_units_[m_unit_index]->GetCodeItem()->insns_;
 }
 
+void MIRGraph::SetPuntToInterpreter(bool val) {
+  punt_to_interpreter_ = val;
+  if (val) {
+    // Disable all subsequent optimizations. They may not be safe to run. (For example,
+    // LVN/GVN assumes there are no conflicts found by the type inference pass.)
+    cu_->disable_opt = ~static_cast<decltype(cu_->disable_opt)>(0);
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 5def19128c..e5abd3be51 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -19,17 +19,17 @@
 
 #include <stdint.h>
 
+#include "base/arena_containers.h"
+#include "base/scoped_arena_containers.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "dex_types.h"
 #include "invoke_type.h"
 #include "mir_field_info.h"
 #include "mir_method_info.h"
-#include "utils/arena_bit_vector.h"
-#include "utils/arena_containers.h"
-#include "utils/scoped_arena_containers.h"
 #include "reg_location.h"
 #include "reg_storage.h"
+#include "utils/arena_bit_vector.h"
 
 namespace art {
 
@@ -37,6 +37,7 @@ struct CompilationUnit;
 class DexCompilationUnit;
 class DexFileMethodInliner;
 class GlobalValueNumbering;
+class GvnDeadCodeElimination;
 
 // Forward declaration.
 class MIRGraph;
@@ -497,19 +498,19 @@ class ChildBlockIterator {
  * more efficient invoke code generation.
  */
 struct CallInfo {
-  int num_arg_words;    // Note: word count, not arg count.
-  RegLocation* args;    // One for each word of arguments.
-  RegLocation result;   // Eventual target of MOVE_RESULT.
+  size_t num_arg_words;   // Note: word count, not arg count.
+  RegLocation* args;      // One for each word of arguments.
+  RegLocation result;     // Eventual target of MOVE_RESULT.
   int opt_flags;
   InvokeType type;
   uint32_t dex_idx;
-  uint32_t index;       // Method idx for invokes, type idx for FilledNewArray.
+  uint32_t index;         // Method idx for invokes, type idx for FilledNewArray.
   uintptr_t direct_code;
   uintptr_t direct_method;
-  RegLocation target;    // Target of following move_result.
+  RegLocation target;     // Target of following move_result.
   bool skip_this;
   bool is_range;
-  DexOffset offset;      // Offset in code units.
+  DexOffset offset;       // Offset in code units.
   MIR* mir;
 };
 
@@ -542,8 +543,9 @@ class MIRGraph {
                     uint32_t method_idx, jobject class_loader, const DexFile& dex_file);
 
   /* Find existing block */
-  BasicBlock* FindBlock(DexOffset code_offset) {
-    return FindBlock(code_offset, false, NULL);
+  BasicBlock* FindBlock(DexOffset code_offset,
+                        ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
+    return FindBlock(code_offset, false, nullptr, dex_pc_to_block_map);
   }
 
   const uint16_t* GetCurrentInsns() const {
@@ -625,8 +627,7 @@ class MIRGraph {
   }
 
   void EnableOpcodeCounting() {
-    opcode_count_ = static_cast<int*>(arena_->Alloc(kNumPackedOpcodes * sizeof(int),
-                                                    kArenaAllocMisc));
+    opcode_count_ = arena_->AllocArray<int>(kNumPackedOpcodes, kArenaAllocMisc);
   }
 
   void ShowOpcodeStats();
@@ -1052,7 +1053,12 @@ class MIRGraph {
 
   void DumpCheckStats();
   MIR* FindMoveResult(BasicBlock* bb, MIR* mir);
-  int SRegToVReg(int ssa_reg) const;
+
+  /* Return the base virtual register for a SSA name */
+  int SRegToVReg(int ssa_reg) const {
+    return ssa_base_vregs_[ssa_reg];
+  }
+
   void VerifyDataflow();
   void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb);
   bool EliminateNullChecksGate();
@@ -1065,6 +1071,9 @@ class MIRGraph {
   bool ApplyGlobalValueNumberingGate();
   bool ApplyGlobalValueNumbering(BasicBlock* bb);
   void ApplyGlobalValueNumberingEnd();
+  bool EliminateDeadCodeGate();
+  bool EliminateDeadCode(BasicBlock* bb);
+  void EliminateDeadCodeEnd();
   bool EliminateSuspendChecksGate();
   bool EliminateSuspendChecks(BasicBlock* bb);
   void EliminateSuspendChecksEnd();
@@ -1072,15 +1081,15 @@ class MIRGraph {
   uint16_t GetGvnIFieldId(MIR* mir) const {
     DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
     DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size());
-    DCHECK(temp_.gvn.ifield_ids_ != nullptr);
-    return temp_.gvn.ifield_ids_[mir->meta.ifield_lowering_info];
+    DCHECK(temp_.gvn.ifield_ids != nullptr);
+    return temp_.gvn.ifield_ids[mir->meta.ifield_lowering_info];
   }
 
   uint16_t GetGvnSFieldId(MIR* mir) const {
     DCHECK(IsInstructionSGetOrSPut(mir->dalvikInsn.opcode));
     DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size());
-    DCHECK(temp_.gvn.sfield_ids_ != nullptr);
-    return temp_.gvn.sfield_ids_[mir->meta.sfield_lowering_info];
+    DCHECK(temp_.gvn.sfield_ids != nullptr);
+    return temp_.gvn.sfield_ids[mir->meta.sfield_lowering_info];
   }
 
   /*
@@ -1115,9 +1124,7 @@ class MIRGraph {
     return punt_to_interpreter_;
   }
 
-  void SetPuntToInterpreter(bool val) {
-    punt_to_interpreter_ = val;
-  }
+  void SetPuntToInterpreter(bool val);
 
   void DisassembleExtendedInstr(const MIR* mir, std::string* decoded_mir);
   char* GetDalvikDisassembly(const MIR* mir);
@@ -1200,7 +1207,7 @@ class MIRGraph {
   void ComputeDominators();
   void CompilerInitializeSSAConversion();
   virtual void InitializeBasicBlockDataFlow();
-  void InsertPhiNodes();
+  void FindPhiNodeBlocks();
   void DoDFSPreOrderSSARename(BasicBlock* block);
 
   bool DfsOrdersUpToDate() const {
@@ -1249,16 +1256,20 @@ class MIRGraph {
   bool ContentIsInsn(const uint16_t* code_ptr);
   BasicBlock* SplitBlock(DexOffset code_offset, BasicBlock* orig_block,
                          BasicBlock** immed_pred_block_p);
-  BasicBlock* FindBlock(DexOffset code_offset, bool create, BasicBlock** immed_pred_block_p);
-  void ProcessTryCatchBlocks();
+  BasicBlock* FindBlock(DexOffset code_offset, bool create, BasicBlock** immed_pred_block_p,
+                        ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
+  void ProcessTryCatchBlocks(ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
   bool IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset, NarrowDexOffset catch_offset);
   BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                               int flags, const uint16_t* code_ptr, const uint16_t* code_end);
+                               int flags, const uint16_t* code_ptr, const uint16_t* code_end,
+                               ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
   BasicBlock* ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                               int flags);
+                               int flags,
+                               ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
   BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
                               int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr,
-                              const uint16_t* code_end);
+                              const uint16_t* code_end,
+                              ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
   int AddNewSReg(int v_reg);
   void HandleSSAUse(int* uses, int dalvik_reg, int reg_index);
   void DataFlowSSAFormat35C(MIR* mir);
@@ -1280,7 +1291,8 @@ class MIRGraph {
    * @param mir The mir to check.
    * @return Returns 'true' if the given MIR might throw an exception.
    */
-  bool CanThrow(MIR* mir);
+  bool CanThrow(MIR* mir) const;
+
   /**
    * @brief Combine multiply and add/sub MIRs into corresponding extended MAC MIR.
    * @param mul_mir The multiply MIR to be combined.
@@ -1319,7 +1331,7 @@ class MIRGraph {
   ArenaVector<int> ssa_base_vregs_;
   ArenaVector<int> ssa_subscripts_;
   // Map original Dalvik virtual reg i to the current SSA name.
-  int* vreg_to_ssa_map_;            // length == method->registers_size
+  int32_t* vreg_to_ssa_map_;        // length == method->registers_size
   int* ssa_last_defs_;              // length == method->registers_size
   ArenaBitVector* is_constant_v_;   // length == num_ssa_reg
   int* constant_values_;            // length == num_ssa_reg
@@ -1373,12 +1385,14 @@ class MIRGraph {
       size_t num_vregs;
       ArenaBitVector* work_live_vregs;
       ArenaBitVector** def_block_matrix;  // num_vregs x num_blocks_.
+      ArenaBitVector** phi_node_blocks;  // num_vregs x num_blocks_.
     } ssa;
     // Global value numbering.
     struct {
       GlobalValueNumbering* gvn;
-      uint16_t* ifield_ids_;  // Part of GVN/LVN but cached here for LVN to avoid recalculation.
-      uint16_t* sfield_ids_;  // Ditto.
+      uint16_t* ifield_ids;  // Part of GVN/LVN but cached here for LVN to avoid recalculation.
+      uint16_t* sfield_ids;  // Ditto.
+      GvnDeadCodeElimination* dce;
     } gvn;
     // Suspend check elimination.
     struct {
@@ -1391,7 +1405,6 @@ class MIRGraph {
   BasicBlock* entry_block_;
   BasicBlock* exit_block_;
   const DexFile::CodeItem* current_code_item_;
-  ArenaVector<uint16_t> dex_pc_to_block_map_;    // FindBlock lookup cache.
   ArenaVector<DexCompilationUnit*> m_units_;     // List of methods included in this graph
   typedef std::pair<int, int> MIRLocation;       // Insert point, (m_unit_ index, offset)
   ArenaVector<MIRLocation> method_stack_;        // Include stack
@@ -1433,6 +1446,7 @@ class MIRGraph {
   friend class SuspendCheckEliminationTest;
   friend class NullCheckEliminationTest;
   friend class GlobalValueNumberingTest;
+  friend class GvnDeadCodeEliminationTest;
   friend class LocalValueNumberingTest;
   friend class TopologicalSortOrderTest;
 };
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 8718191069..fd67d4ebec 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -16,17 +16,18 @@
 
 #include "base/bit_vector-inl.h"
 #include "base/logging.h"
+#include "base/scoped_arena_containers.h"
 #include "dataflow_iterator-inl.h"
 #include "dex_flags.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "global_value_numbering.h"
+#include "gvn_dead_code_elimination.h"
 #include "local_value_numbering.h"
 #include "mir_field_info.h"
 #include "quick/dex_file_method_inliner.h"
 #include "quick/dex_file_to_method_inliner_map.h"
 #include "stack.h"
-#include "utils/scoped_arena_containers.h"
 
 namespace art {
 
@@ -632,8 +633,7 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
               } else {
                 DCHECK_EQ(SelectKind(if_true), kSelectMove);
                 DCHECK_EQ(SelectKind(if_false), kSelectMove);
-                int* src_ssa =
-                    static_cast<int*>(arena_->Alloc(sizeof(int) * 3, kArenaAllocDFInfo));
+                int32_t* src_ssa = arena_->AllocArray<int32_t>(3, kArenaAllocDFInfo);
                 src_ssa[0] = mir->ssa_rep->uses[0];
                 src_ssa[1] = if_true->ssa_rep->uses[0];
                 src_ssa[2] = if_false->ssa_rep->uses[0];
@@ -641,15 +641,12 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
                 mir->ssa_rep->num_uses = 3;
               }
               mir->ssa_rep->num_defs = 1;
-              mir->ssa_rep->defs =
-                  static_cast<int*>(arena_->Alloc(sizeof(int) * 1, kArenaAllocDFInfo));
-              mir->ssa_rep->fp_def =
-                  static_cast<bool*>(arena_->Alloc(sizeof(bool) * 1, kArenaAllocDFInfo));
+              mir->ssa_rep->defs = arena_->AllocArray<int32_t>(1, kArenaAllocDFInfo);
+              mir->ssa_rep->fp_def = arena_->AllocArray<bool>(1, kArenaAllocDFInfo);
               mir->ssa_rep->fp_def[0] = if_true->ssa_rep->fp_def[0];
               // Match type of uses to def.
-              mir->ssa_rep->fp_use =
-                  static_cast<bool*>(arena_->Alloc(sizeof(bool) * mir->ssa_rep->num_uses,
-                                                   kArenaAllocDFInfo));
+              mir->ssa_rep->fp_use = arena_->AllocArray<bool>(mir->ssa_rep->num_uses,
+                                                              kArenaAllocDFInfo);
               for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
                 mir->ssa_rep->fp_use[i] = mir->ssa_rep->fp_def[0];
               }
@@ -900,8 +897,8 @@ bool MIRGraph::EliminateNullChecksGate() {
   temp_.nce.num_vregs = GetNumOfCodeAndTempVRs();
   temp_.nce.work_vregs_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
       temp_scoped_alloc_.get(), temp_.nce.num_vregs, false, kBitMapNullCheck);
-  temp_.nce.ending_vregs_to_check_matrix = static_cast<ArenaBitVector**>(
-      temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * GetNumBlocks(), kArenaAllocMisc));
+  temp_.nce.ending_vregs_to_check_matrix =
+      temp_scoped_alloc_->AllocArray<ArenaBitVector*>(GetNumBlocks(), kArenaAllocMisc);
   std::fill_n(temp_.nce.ending_vregs_to_check_matrix, GetNumBlocks(), nullptr);
 
   // reset MIR_MARK
@@ -1133,8 +1130,7 @@ bool MIRGraph::EliminateClassInitChecksGate() {
 
   // Each insn we use here has at least 2 code units, offset/2 will be a unique index.
   const size_t end = (GetNumDalvikInsns() + 1u) / 2u;
-  temp_.cice.indexes = static_cast<uint16_t*>(
-      temp_scoped_alloc_->Alloc(end * sizeof(*temp_.cice.indexes), kArenaAllocGrowableArray));
+  temp_.cice.indexes = temp_scoped_alloc_->AllocArray<uint16_t>(end, kArenaAllocGrowableArray);
   std::fill_n(temp_.cice.indexes, end, 0xffffu);
 
   uint32_t unique_class_count = 0u;
@@ -1215,8 +1211,8 @@ bool MIRGraph::EliminateClassInitChecksGate() {
   temp_.cice.num_class_bits = 2u * unique_class_count;
   temp_.cice.work_classes_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
       temp_scoped_alloc_.get(), temp_.cice.num_class_bits, false, kBitMapClInitCheck);
-  temp_.cice.ending_classes_to_check_matrix = static_cast<ArenaBitVector**>(
-      temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * GetNumBlocks(), kArenaAllocMisc));
+  temp_.cice.ending_classes_to_check_matrix =
+      temp_scoped_alloc_->AllocArray<ArenaBitVector*>(GetNumBlocks(), kArenaAllocMisc);
   std::fill_n(temp_.cice.ending_classes_to_check_matrix, GetNumBlocks(), nullptr);
   DCHECK_GT(temp_.cice.num_class_bits, 0u);
   return true;
@@ -1338,9 +1334,9 @@ bool MIRGraph::ApplyGlobalValueNumberingGate() {
 
   DCHECK(temp_scoped_alloc_ == nullptr);
   temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-  temp_.gvn.ifield_ids_ =
+  temp_.gvn.ifield_ids =
       GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
-  temp_.gvn.sfield_ids_ =
+  temp_.gvn.sfield_ids =
       GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
   DCHECK(temp_.gvn.gvn == nullptr);
   temp_.gvn.gvn = new (temp_scoped_alloc_.get()) GlobalValueNumbering(
@@ -1364,8 +1360,8 @@ void MIRGraph::ApplyGlobalValueNumberingEnd() {
   // Perform modifications.
   DCHECK(temp_.gvn.gvn != nullptr);
   if (temp_.gvn.gvn->Good()) {
+    temp_.gvn.gvn->StartPostProcessing();
     if (max_nested_loops_ != 0u) {
-      temp_.gvn.gvn->StartPostProcessing();
       TopologicalSortIterator iter(this);
       for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
         ScopedArenaAllocator allocator(&cu_->arena_stack);  // Reclaim memory after each LVN.
@@ -1383,12 +1379,45 @@ void MIRGraph::ApplyGlobalValueNumberingEnd() {
     cu_->disable_opt |= (1u << kLocalValueNumbering);
   } else {
     LOG(WARNING) << "GVN failed for " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+    cu_->disable_opt |= (1u << kGvnDeadCodeElimination);
   }
 
+  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) {
+    EliminateDeadCodeEnd();
+  }  // else preserve GVN data for CSE.
+}
+
+bool MIRGraph::EliminateDeadCodeGate() {
+  if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) {
+    return false;
+  }
+  DCHECK(temp_scoped_alloc_ != nullptr);
+  temp_.gvn.dce = new (temp_scoped_alloc_.get()) GvnDeadCodeElimination(temp_.gvn.gvn,
+                                                                        temp_scoped_alloc_.get());
+  return true;
+}
+
+bool MIRGraph::EliminateDeadCode(BasicBlock* bb) {
+  DCHECK(temp_scoped_alloc_ != nullptr);
+  DCHECK(temp_.gvn.gvn != nullptr);
+  if (bb->block_type != kDalvikByteCode) {
+    return false;
+  }
+  DCHECK(temp_.gvn.dce != nullptr);
+  temp_.gvn.dce->Apply(bb);
+  return false;  // No need to repeat.
+}
+
+void MIRGraph::EliminateDeadCodeEnd() {
+  DCHECK_EQ(temp_.gvn.dce != nullptr, (cu_->disable_opt & (1 << kGvnDeadCodeElimination)) == 0);
+  if (temp_.gvn.dce != nullptr) {
+    delete temp_.gvn.dce;
+    temp_.gvn.dce = nullptr;
+  }
   delete temp_.gvn.gvn;
   temp_.gvn.gvn = nullptr;
-  temp_.gvn.ifield_ids_ = nullptr;
-  temp_.gvn.sfield_ids_ = nullptr;
+  temp_.gvn.ifield_ids = nullptr;
+  temp_.gvn.sfield_ids = nullptr;
   DCHECK(temp_scoped_alloc_ != nullptr);
   temp_scoped_alloc_.reset();
 }
@@ -1441,8 +1470,8 @@ void MIRGraph::InlineSpecialMethodsStart() {
   temp_.smi.processed_indexes = new (temp_scoped_alloc_.get()) ArenaBitVector(
       temp_scoped_alloc_.get(), temp_.smi.num_indexes, false, kBitMapMisc);
   temp_.smi.processed_indexes->ClearAllBits();
-  temp_.smi.lowering_infos = static_cast<uint16_t*>(temp_scoped_alloc_->Alloc(
-      temp_.smi.num_indexes * sizeof(*temp_.smi.lowering_infos), kArenaAllocGrowableArray));
+  temp_.smi.lowering_infos =
+      temp_scoped_alloc_->AllocArray<uint16_t>(temp_.smi.num_indexes, kArenaAllocGrowableArray);
 }
 
 void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
@@ -1558,9 +1587,9 @@ bool MIRGraph::BuildExtendedBBList(class BasicBlock* bb) {
 void MIRGraph::BasicBlockOptimizationStart() {
   if ((cu_->disable_opt & (1 << kLocalValueNumbering)) == 0) {
     temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
-    temp_.gvn.ifield_ids_ =
+    temp_.gvn.ifield_ids =
         GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
-    temp_.gvn.sfield_ids_ =
+    temp_.gvn.sfield_ids =
         GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
   }
 }
@@ -1586,8 +1615,8 @@ void MIRGraph::BasicBlockOptimization() {
 
 void MIRGraph::BasicBlockOptimizationEnd() {
   // Clean up after LVN.
-  temp_.gvn.ifield_ids_ = nullptr;
-  temp_.gvn.sfield_ids_ = nullptr;
+  temp_.gvn.ifield_ids = nullptr;
+  temp_.gvn.sfield_ids = nullptr;
   temp_scoped_alloc_.reset();
 }
 
@@ -1603,8 +1632,7 @@ bool MIRGraph::EliminateSuspendChecksGate() {
     temp_.sce.inliner =
         cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
   }
-  suspend_checks_in_loops_ = static_cast<uint32_t*>(
-      arena_->Alloc(GetNumBlocks() * sizeof(*suspend_checks_in_loops_), kArenaAllocMisc));
+  suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc);
   return true;
 }
 
@@ -1690,7 +1718,7 @@ void MIRGraph::EliminateSuspendChecksEnd() {
   temp_.sce.inliner = nullptr;
 }
 
-bool MIRGraph::CanThrow(MIR* mir) {
+bool MIRGraph::CanThrow(MIR* mir) const {
   if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) {
     return false;
   }
@@ -1724,7 +1752,6 @@ bool MIRGraph::CanThrow(MIR* mir) {
     // Non-throwing only if range check has been eliminated.
     return ((opt_flags & MIR_IGNORE_RANGE_CHECK) == 0);
   } else if (mir->dalvikInsn.opcode == Instruction::ARRAY_LENGTH ||
-      mir->dalvikInsn.opcode == Instruction::FILL_ARRAY_DATA ||
       static_cast<int>(mir->dalvikInsn.opcode) == kMirOpNullCheck) {
     // No more checks for these (null check was processed above).
     return false;
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 199bc27481..be05b80d83 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -280,7 +280,7 @@ class MirOptimizationTest : public testing::Test {
 
   void DoPrepareMIRs(const MIRDef* defs, size_t count) {
     mir_count_ = count;
-    mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+    mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
     uint64_t merged_df_flags = 0u;
     for (size_t i = 0u; i != count; ++i) {
       const MIRDef* def = &defs[i];
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 8c8bde63ea..320d06aa06 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -45,6 +45,7 @@ void PassDriverMEOpts::SetupPasses(PassManager* pass_manager) {
   pass_manager->AddPass(new BBCombine);
   pass_manager->AddPass(new CodeLayout);
   pass_manager->AddPass(new GlobalValueNumberingPass);
+  pass_manager->AddPass(new DeadCodeEliminationPass);
   pass_manager->AddPass(new ConstantPropagation);
   pass_manager->AddPass(new MethodUseCount);
   pass_manager->AddPass(new BBOptimizations);
diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc
index 4e1322702f..a8b8a54033 100644
--- a/compiler/dex/pass_driver_me_post_opt.cc
+++ b/compiler/dex/pass_driver_me_post_opt.cc
@@ -37,7 +37,7 @@ void PassDriverMEPostOpt::SetupPasses(PassManager* pass_manager) {
   pass_manager->AddPass(new InitializeSSATransformation);
   pass_manager->AddPass(new ClearPhiInstructions);
   pass_manager->AddPass(new DefBlockMatrix);
-  pass_manager->AddPass(new CreatePhiNodes);
+  pass_manager->AddPass(new FindPhiNodeBlocksPass);
   pass_manager->AddPass(new SSAConversion);
   pass_manager->AddPass(new PhiNodeOperands);
   pass_manager->AddPass(new PerformInitRegLocations);
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
index a3dbc5a273..1ab862503b 100644
--- a/compiler/dex/post_opt_passes.h
+++ b/compiler/dex/post_opt_passes.h
@@ -189,19 +189,19 @@ class DefBlockMatrix : public PassMEMirSsaRep {
 };
 
 /**
- * @class CreatePhiNodes
- * @brief Pass to create the phi nodes after SSA calculation
+ * @class FindPhiNodeBlocksPass
+ * @brief Pass to find out where we need to insert the phi nodes for the SSA conversion.
  */
-class CreatePhiNodes : public PassMEMirSsaRep {
+class FindPhiNodeBlocksPass : public PassMEMirSsaRep {
  public:
-  CreatePhiNodes() : PassMEMirSsaRep("CreatePhiNodes", kNoNodes) {
+  FindPhiNodeBlocksPass() : PassMEMirSsaRep("FindPhiNodeBlocks", kNoNodes) {
   }
 
   void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
     CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
-    c_unit->mir_graph.get()->InsertPhiNodes();
+    c_unit->mir_graph.get()->FindPhiNodeBlocks();
   }
 };
 
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index f15b727857..9cf005bc48 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -52,16 +52,13 @@ namespace art {
  */
 void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpSparseSwitchTable(table);
-  }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+  tab_rec->switch_mir = mir;
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   uint32_t size = table[1];
-  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
   switch_tables_.push_back(tab_rec);
 
   // Get the switch value
@@ -100,17 +97,13 @@ void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocati
 
 void ArmMir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpPackedSwitchTable(table);
-  }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
+  tab_rec->switch_mir = mir;
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   uint32_t size = table[1];
-  tab_rec->targets =
-      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
   switch_tables_.push_back(tab_rec);
 
   // Get the switch value
@@ -491,6 +484,28 @@ void ArmMir2Lir::GenSpecialExitSequence() {
   NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
 }
 
+void ArmMir2Lir::GenSpecialEntryForSuspend() {
+  // Keep 16-byte stack alignment - push r0, i.e. ArtMethod*, r5, r6, lr.
+  DCHECK(!IsTemp(rs_r5));
+  DCHECK(!IsTemp(rs_r6));
+  core_spill_mask_ =
+      (1u << rs_r5.GetRegNum()) | (1u << rs_r6.GetRegNum()) | (1u << rs_rARM_LR.GetRegNum());
+  num_core_spills_ = 3u;
+  fp_spill_mask_ = 0u;
+  num_fp_spills_ = 0u;
+  frame_size_ = 16u;
+  core_vmap_table_.clear();
+  fp_vmap_table_.clear();
+  NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) |                 // ArtMethod*
+          (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |  // Spills other than LR.
+          (1u << 8));                                             // LR encoded for 16-bit push.
+}
+
+void ArmMir2Lir::GenSpecialExitForSuspend() {
+  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+  NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_);  // 32-bit because of LR.
+}
+
 static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
   // Emit relative calls only within a dex file due to the limited range of the BL insn.
   return cu->dex_file == target_method.dex_file;
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 025e69f0ba..67fabbddfe 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -18,9 +18,9 @@
 #define ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
 
 #include "arm_lir.h"
+#include "base/arena_containers.h"
 #include "base/logging.h"
 #include "dex/quick/mir_to_lir.h"
-#include "utils/arena_containers.h"
 
 namespace art {
 
@@ -167,7 +167,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     void GenDivZeroCheckWide(RegStorage reg);
     void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
     void GenExitSequence();
-    void GenSpecialExitSequence();
+    void GenSpecialExitSequence() OVERRIDE;
+    void GenSpecialEntryForSuspend() OVERRIDE;
+    void GenSpecialExitForSuspend() OVERRIDE;
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 6492442b94..24e8fdff80 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -51,16 +51,13 @@ namespace art {
  */
 void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpSparseSwitchTable(table);
-  }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+  tab_rec->switch_mir = mir;
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   uint32_t size = table[1];
-  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
   switch_tables_.push_back(tab_rec);
 
   // Get the switch value
@@ -103,17 +100,13 @@ void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLoca
 
 void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpPackedSwitchTable(table);
-  }
   // Add the table to the list - we'll process it later
   SwitchTable *tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable),  kArenaAllocData));
+  tab_rec->switch_mir = mir;
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   uint32_t size = table[1];
-  tab_rec->targets =
-      static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
   switch_tables_.push_back(tab_rec);
 
   // Get the switch value
@@ -399,10 +392,26 @@ void Arm64Mir2Lir::GenSpecialExitSequence() {
   NewLIR0(kA64Ret);
 }
 
+void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
+  // Keep 16-byte stack alignment - push x0, i.e. ArtMethod*, lr.
+  core_spill_mask_ = (1u << rs_xLR.GetRegNum());
+  num_core_spills_ = 1u;
+  fp_spill_mask_ = 0u;
+  num_fp_spills_ = 0u;
+  frame_size_ = 16u;
+  core_vmap_table_.clear();
+  fp_vmap_table_.clear();
+  NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
+}
+
+void Arm64Mir2Lir::GenSpecialExitForSuspend() {
+  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+  NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
+}
+
 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
-  UNUSED(cu, target_method);
-  // Always emit relative calls.
-  return true;
+  // Emit relative calls anywhere in the image or within a dex file otherwise.
+  return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file;
 }
 
 /*
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 49ca625096..d5f0536691 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -169,6 +169,8 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
   void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
   void GenExitSequence() OVERRIDE;
   void GenSpecialExitSequence() OVERRIDE;
+  void GenSpecialEntryForSuspend() OVERRIDE;
+  void GenSpecialExitForSuspend() OVERRIDE;
   void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
   void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
   void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 04113dba81..0be9fd4781 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -456,37 +456,29 @@ LIR* Mir2Lir::AddWideData(LIR* *constant_list_p, int val_lo, int val_hi) {
   return AddWordData(constant_list_p, val_lo);
 }
 
-static void Push32(std::vector<uint8_t>&buf, int data) {
-  buf.push_back(data & 0xff);
-  buf.push_back((data >> 8) & 0xff);
-  buf.push_back((data >> 16) & 0xff);
-  buf.push_back((data >> 24) & 0xff);
-}
-
 /**
  * @brief Push a compressed reference which needs patching at link/patchoat-time.
  * @details This needs to be kept consistent with the code which actually does the patching in
  *   oat_writer.cc and in the patchoat tool.
  */
-static void PushUnpatchedReference(std::vector<uint8_t>&buf) {
+static void PushUnpatchedReference(CodeBuffer* buf) {
   // Note that we can safely initialize the patches to zero. The code deduplication mechanism takes
   // the patches into account when determining whether two pieces of codes are functionally
   // equivalent.
   Push32(buf, UINT32_C(0));
 }
 
-static void AlignBuffer(std::vector<uint8_t>&buf, size_t offset) {
-  while (buf.size() < offset) {
-    buf.push_back(0);
-  }
+static void AlignBuffer(CodeBuffer* buf, size_t offset) {
+  DCHECK_LE(buf->size(), offset);
+  buf->insert(buf->end(), offset - buf->size(), 0u);
 }
 
 /* Write the literal pool to the output stream */
 void Mir2Lir::InstallLiteralPools() {
-  AlignBuffer(code_buffer_, data_offset_);
+  AlignBuffer(&code_buffer_, data_offset_);
   LIR* data_lir = literal_list_;
   while (data_lir != nullptr) {
-    Push32(code_buffer_, data_lir->operands[0]);
+    Push32(&code_buffer_, data_lir->operands[0]);
     data_lir = NEXT_LIR(data_lir);
   }
   // TODO: patches_.reserve() as needed.
@@ -498,7 +490,7 @@ void Mir2Lir::InstallLiteralPools() {
         reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
     patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(),
                                               target_dex_file, target_method_idx));
-    PushUnpatchedReference(code_buffer_);
+    PushUnpatchedReference(&code_buffer_);
     data_lir = NEXT_LIR(data_lir);
   }
   data_lir = method_literal_list_;
@@ -508,7 +500,7 @@ void Mir2Lir::InstallLiteralPools() {
         reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
     patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(),
                                                 target_dex_file, target_method_idx));
-    PushUnpatchedReference(code_buffer_);
+    PushUnpatchedReference(&code_buffer_);
     data_lir = NEXT_LIR(data_lir);
   }
   // Push class literals.
@@ -519,7 +511,7 @@ void Mir2Lir::InstallLiteralPools() {
       reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
     patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(),
                                               class_dex_file, target_type_idx));
-    PushUnpatchedReference(code_buffer_);
+    PushUnpatchedReference(&code_buffer_);
     data_lir = NEXT_LIR(data_lir);
   }
 }
@@ -527,7 +519,7 @@ void Mir2Lir::InstallLiteralPools() {
 /* Write the switch tables to the output stream */
 void Mir2Lir::InstallSwitchTables() {
   for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
-    AlignBuffer(code_buffer_, tab_rec->offset);
+    AlignBuffer(&code_buffer_, tab_rec->offset);
     /*
      * For Arm, our reference point is the address of the bx
      * instruction that does the launch, so we have to subtract
@@ -557,29 +549,49 @@ void Mir2Lir::InstallSwitchTables() {
       LOG(INFO) << "Switch table for offset 0x" << std::hex << bx_offset;
     }
     if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) {
-      const int32_t* keys = reinterpret_cast<const int32_t*>(&(tab_rec->table[2]));
-      for (int elems = 0; elems < tab_rec->table[1]; elems++) {
-        int disp = tab_rec->targets[elems]->offset - bx_offset;
+      DCHECK(tab_rec->switch_mir != nullptr);
+      BasicBlock* bb = mir_graph_->GetBasicBlock(tab_rec->switch_mir->bb);
+      DCHECK(bb != nullptr);
+      int elems = 0;
+      for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+        int key = successor_block_info->key;
+        int target = successor_block_info->block;
+        LIR* boundary_lir = InsertCaseLabel(target, key);
+        DCHECK(boundary_lir != nullptr);
+        int disp = boundary_lir->offset - bx_offset;
+        Push32(&code_buffer_, key);
+        Push32(&code_buffer_, disp);
         if (cu_->verbose) {
           LOG(INFO) << "  Case[" << elems << "] key: 0x"
-                    << std::hex << keys[elems] << ", disp: 0x"
+                    << std::hex << key << ", disp: 0x"
                     << std::hex << disp;
         }
-        Push32(code_buffer_, keys[elems]);
-        Push32(code_buffer_,
-          tab_rec->targets[elems]->offset - bx_offset);
+        elems++;
       }
+      DCHECK_EQ(elems, tab_rec->table[1]);
     } else {
       DCHECK_EQ(static_cast<int>(tab_rec->table[0]),
                 static_cast<int>(Instruction::kPackedSwitchSignature));
-      for (int elems = 0; elems < tab_rec->table[1]; elems++) {
-        int disp = tab_rec->targets[elems]->offset - bx_offset;
+      DCHECK(tab_rec->switch_mir != nullptr);
+      BasicBlock* bb = mir_graph_->GetBasicBlock(tab_rec->switch_mir->bb);
+      DCHECK(bb != nullptr);
+      int elems = 0;
+      int low_key = s4FromSwitchData(&tab_rec->table[2]);
+      for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+        int key = successor_block_info->key;
+        DCHECK_EQ(elems + low_key, key);
+        int target = successor_block_info->block;
+        LIR* boundary_lir = InsertCaseLabel(target, key);
+        DCHECK(boundary_lir != nullptr);
+        int disp = boundary_lir->offset - bx_offset;
+        Push32(&code_buffer_, disp);
         if (cu_->verbose) {
           LOG(INFO) << "  Case[" << elems << "] disp: 0x"
                     << std::hex << disp;
         }
-        Push32(code_buffer_, tab_rec->targets[elems]->offset - bx_offset);
+        elems++;
       }
+      DCHECK_EQ(elems, tab_rec->table[1]);
     }
   }
 }
@@ -587,7 +599,7 @@ void Mir2Lir::InstallSwitchTables() {
 /* Write the fill array dta to the output stream */
 void Mir2Lir::InstallFillArrayData() {
   for (Mir2Lir::FillArrayData* tab_rec : fill_array_data_) {
-    AlignBuffer(code_buffer_, tab_rec->offset);
+    AlignBuffer(&code_buffer_, tab_rec->offset);
     for (int i = 0; i < (tab_rec->size + 1) / 2; i++) {
       code_buffer_.push_back(tab_rec->table[i] & 0xFF);
       code_buffer_.push_back((tab_rec->table[i] >> 8) & 0xFF);
@@ -830,58 +842,25 @@ int Mir2Lir::AssignFillArrayDataOffset(CodeOffset offset) {
  * branch table during the assembly phase.  All resource flags
  * are set to prevent code motion.  KeyVal is just there for debugging.
  */
-LIR* Mir2Lir::InsertCaseLabel(DexOffset vaddr, int keyVal) {
-  LIR* boundary_lir = &block_label_list_[mir_graph_->FindBlock(vaddr)->id];
+LIR* Mir2Lir::InsertCaseLabel(uint32_t bbid, int keyVal) {
+  LIR* boundary_lir = &block_label_list_[bbid];
   LIR* res = boundary_lir;
   if (cu_->verbose) {
     // Only pay the expense if we're pretty-printing.
     LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
-    new_label->dalvik_offset = vaddr;
+    BasicBlock* bb = mir_graph_->GetBasicBlock(bbid);
+    DCHECK(bb != nullptr);
+    new_label->dalvik_offset = bb->start_offset;
     new_label->opcode = kPseudoCaseLabel;
     new_label->operands[0] = keyVal;
     new_label->flags.fixup = kFixupLabel;
     DCHECK(!new_label->flags.use_def_invalid);
     new_label->u.m.def_mask = &kEncodeAll;
     InsertLIRAfter(boundary_lir, new_label);
-    res = new_label;
   }
   return res;
 }
 
-void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
-  const uint16_t* table = tab_rec->table;
-  DexOffset base_vaddr = tab_rec->vaddr;
-  const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]);
-  int entries = table[1];
-  int low_key = s4FromSwitchData(&table[2]);
-  for (int i = 0; i < entries; i++) {
-    tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], i + low_key);
-  }
-}
-
-void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
-  const uint16_t* table = tab_rec->table;
-  DexOffset base_vaddr = tab_rec->vaddr;
-  int entries = table[1];
-  const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]);
-  const int32_t* targets = &keys[entries];
-  for (int i = 0; i < entries; i++) {
-    tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], keys[i]);
-  }
-}
-
-void Mir2Lir::ProcessSwitchTables() {
-  for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
-    if (tab_rec->table[0] == Instruction::kPackedSwitchSignature) {
-      MarkPackedCaseLabels(tab_rec);
-    } else if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) {
-      MarkSparseCaseLabels(tab_rec);
-    } else {
-      LOG(FATAL) << "Invalid switch table";
-    }
-  }
-}
-
 void Mir2Lir::DumpSparseSwitchTable(const uint16_t* table) {
   /*
    * Sparse switch data format:
@@ -988,8 +967,11 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
       estimated_native_code_size_(0),
       reg_pool_(nullptr),
       live_sreg_(0),
+      code_buffer_(mir_graph->GetArena()->Adapter()),
+      encoded_mapping_table_(mir_graph->GetArena()->Adapter()),
       core_vmap_table_(mir_graph->GetArena()->Adapter()),
       fp_vmap_table_(mir_graph->GetArena()->Adapter()),
+      native_gc_map_(mir_graph->GetArena()->Adapter()),
       patches_(mir_graph->GetArena()->Adapter()),
       num_core_spills_(0),
       num_fp_spills_(0),
@@ -1032,9 +1014,6 @@ void Mir2Lir::Materialize() {
 
   /* Method is not empty */
   if (first_lir_insn_) {
-    // mark the targets of switch statement case labels
-    ProcessSwitchTables();
-
     /* Convert LIR into machine code. */
     AssembleLIR();
 
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 9f53b89186..3c9b7a3ed3 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -413,7 +413,7 @@ void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest,
  * Current code also throws internal unimp if not 'L', '[' or 'I'.
  */
 void Mir2Lir::GenFilledNewArray(CallInfo* info) {
-  int elems = info->num_arg_words;
+  size_t elems = info->num_arg_words;
   int type_idx = info->index;
   FlushAllRegs();  /* Everything to home location */
   QuickEntrypointEnum target;
@@ -450,7 +450,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
      * of any regs in the source range that have been promoted to
      * home location.
      */
-    for (int i = 0; i < elems; i++) {
+    for (size_t i = 0; i < elems; i++) {
       RegLocation loc = UpdateLoc(info->args[i]);
       if (loc.location == kLocPhysReg) {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
@@ -493,7 +493,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
     OpRegRegImm(kOpAdd, r_dst, ref_reg,
                 mirror::Array::DataOffset(component_size).Int32Value());
     // Set up the loop counter (known to be > 0)
-    LoadConstant(r_idx, elems - 1);
+    LoadConstant(r_idx, static_cast<int>(elems - 1));
     // Generate the copy loop.  Going backwards for convenience
     LIR* loop_head_target = NewLIR0(kPseudoTargetLabel);
     // Copy next element
@@ -515,9 +515,9 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
     FreeTemp(r_dst);
     FreeTemp(r_src);
   } else {
-    DCHECK_LE(elems, 5);  // Usually but not necessarily non-range.
+    DCHECK_LE(elems, 5u);  // Usually but not necessarily non-range.
     // TUNING: interleave
-    for (int i = 0; i < elems; i++) {
+    for (size_t i = 0; i < elems; i++) {
       RegLocation rl_arg;
       if (info->args[i].ref) {
         rl_arg = LoadValue(info->args[i], kRefReg);
@@ -537,7 +537,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
   }
   if (elems != 0 && info->args[0].ref) {
     // If there is at least one potentially non-null value, unconditionally mark the GC card.
-    for (int i = 0; i < elems; i++) {
+    for (size_t i = 0; i < elems; i++) {
       if (!mir_graph_->IsConstantNullRef(info->args[i])) {
         UnconditionallyMarkGCCard(ref_reg);
         break;
@@ -2158,7 +2158,7 @@ void Mir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_d
   }
 }
 
-class SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
+class Mir2Lir::SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
  public:
   SuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
       : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont) {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index bb5b0cdd22..8e3df7c7a2 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -405,9 +405,10 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
    */
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   RegLocation* t_loc = nullptr;
+  EnsureInitializedArgMappingToPhysicalReg();
   for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) {
     // get reg corresponding to input
-    RegStorage reg = GetArgMappingToPhysicalReg(i);
+    RegStorage reg = in_to_reg_storage_mapping_.GetReg(i);
     t_loc = &ArgLocs[i];
 
     // If the wide input appeared as single, flush it and go
@@ -661,7 +662,7 @@ void Mir2Lir::GenDalvikArgsFlushPromoted(CallInfo* info, int start) {
   }
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   // Scan the rest of the args - if in phys_reg flush to memory
-  for (int next_arg = start; next_arg < info->num_arg_words;) {
+  for (size_t next_arg = start; next_arg < info->num_arg_words;) {
     RegLocation loc = info->args[next_arg];
     if (loc.wide) {
       loc = UpdateLocWide(loc);
@@ -719,10 +720,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
                            uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
                            InvokeType type, bool skip_this) {
   // If no arguments, just return.
-  if (info->num_arg_words == 0)
+  if (info->num_arg_words == 0u)
     return call_state;
 
-  const int start_index = skip_this ? 1 : 0;
+  const size_t start_index = skip_this ? 1 : 0;
 
   // Get architecture dependent mapping between output VRs and physical registers
   // basing on shorty of method to call.
@@ -733,13 +734,13 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
     in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
   }
 
-  int stack_map_start = std::max(in_to_reg_storage_mapping.GetMaxMappedIn() + 1, start_index);
+  size_t stack_map_start = std::max(in_to_reg_storage_mapping.GetEndMappedIn(), start_index);
   if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) {
     // It is possible that the last mapped reg is 32 bit while arg is 64-bit.
     // It will be handled together with low part mapped to register.
     stack_map_start++;
   }
-  int regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
+  size_t regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
 
   // If it is a range case we can try to copy remaining VRs (not mapped to physical registers)
   // using more optimal algorithm.
@@ -755,11 +756,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
     RegStorage regRef = TargetReg(kArg3, kRef);
     RegStorage regSingle = TargetReg(kArg3, kNotWide);
     RegStorage regWide = TargetReg(kArg2, kWide);
-    for (int i = start_index;
-         i < stack_map_start + regs_left_to_pass_via_stack; i++) {
+    for (size_t i = start_index; i < stack_map_start + regs_left_to_pass_via_stack; i++) {
       RegLocation rl_arg = info->args[i];
       rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      RegStorage reg = in_to_reg_storage_mapping.GetReg(i);
       if (!reg.Valid()) {
         int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
         {
@@ -799,10 +799,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
   }
 
   // Finish with VRs mapped to physical registers.
-  for (int i = start_index; i < stack_map_start; i++) {
+  for (size_t i = start_index; i < stack_map_start; i++) {
     RegLocation rl_arg = info->args[i];
     rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    RegStorage reg = in_to_reg_storage_mapping.GetReg(i);
     if (reg.Valid()) {
       if (rl_arg.wide) {
         // if reg is not 64-bit (it is half of 64-bit) then handle it separately.
@@ -852,12 +852,11 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
   return call_state;
 }
 
-RegStorage Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+void Mir2Lir::EnsureInitializedArgMappingToPhysicalReg() {
   if (!in_to_reg_storage_mapping_.IsInitialized()) {
     ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic);
     in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
   }
-  return in_to_reg_storage_mapping_.Get(arg_num);
 }
 
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 9f36e35f5e..db844bcde9 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -44,7 +44,9 @@ LIR* Mir2Lir::LoadConstant(RegStorage r_dest, int value) {
 void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) {
   if (rl_dest.fp) {
     int pmap_index = SRegToPMap(rl_dest.s_reg_low);
-    if (promotion_map_[pmap_index].fp_location == kLocPhysReg) {
+    const bool is_fp_promoted = promotion_map_[pmap_index].fp_location == kLocPhysReg;
+    const bool is_core_promoted = promotion_map_[pmap_index].core_location == kLocPhysReg;
+    if (is_fp_promoted || is_core_promoted) {
       // Now, determine if this vreg is ever used as a reference.  If not, we're done.
       bool used_as_reference = false;
       int base_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
@@ -61,7 +63,7 @@ void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) {
         temp_reg = AllocTemp();
         LoadConstant(temp_reg, 0);
       }
-      if (promotion_map_[pmap_index].core_location == kLocPhysReg) {
+      if (is_core_promoted) {
         // Promoted - just copy in a zero
         OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg);
       } else {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index ccfdaf60bb..d9471f6fd1 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -68,17 +68,13 @@ bool MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& s
  */
 void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpSparseSwitchTable(table);
-  }
   // Add the table to the list - we'll process it later
   SwitchTable* tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+  tab_rec->switch_mir = mir;
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   int elements = table[1];
-  tab_rec->targets =
-      static_cast<LIR**>(arena_->Alloc(elements * sizeof(LIR*), kArenaAllocLIR));
   switch_tables_.push_back(tab_rec);
 
   // The table is composed of 8-byte key/disp pairs
@@ -145,17 +141,13 @@ void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLoca
  */
 void MipsMir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpPackedSwitchTable(table);
-  }
   // Add the table to the list - we'll process it later
   SwitchTable* tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+  tab_rec->switch_mir = mir;
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   int size = table[1];
-  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
-                                                      kArenaAllocLIR));
   switch_tables_.push_back(tab_rec);
 
   // Get the switch value
@@ -323,6 +315,26 @@ void MipsMir2Lir::GenSpecialExitSequence() {
   OpReg(kOpBx, rs_rRA);
 }
 
+void MipsMir2Lir::GenSpecialEntryForSuspend() {
+  // Keep 16-byte stack alignment - push A0, i.e. ArtMethod*, 2 filler words and RA.
+  core_spill_mask_ = (1u << rs_rRA.GetRegNum());
+  num_core_spills_ = 1u;
+  fp_spill_mask_ = 0u;
+  num_fp_spills_ = 0u;
+  frame_size_ = 16u;
+  core_vmap_table_.clear();
+  fp_vmap_table_.clear();
+  OpRegImm(kOpSub, rs_rMIPS_SP, frame_size_);
+  Store32Disp(rs_rMIPS_SP, frame_size_ - 4, rs_rRA);
+  Store32Disp(rs_rMIPS_SP, 0, rs_rA0);
+}
+
+void MipsMir2Lir::GenSpecialExitForSuspend() {
+  // Pop the frame. Don't pop ArtMethod*, it's no longer needed.
+  Load32Disp(rs_rMIPS_SP, frame_size_ - 4, rs_rRA);
+  OpRegImm(kOpAdd, rs_rMIPS_SP, frame_size_);
+}
+
 /*
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index a37fe40cfa..e1b43ca848 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -141,7 +141,9 @@ class MipsMir2Lir FINAL : public Mir2Lir {
     void GenDivZeroCheckWide(RegStorage reg);
     void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
     void GenExitSequence();
-    void GenSpecialExitSequence();
+    void GenSpecialExitSequence() OVERRIDE;
+    void GenSpecialEntryForSuspend() OVERRIDE;
+    void GenSpecialExitForSuspend() OVERRIDE;
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 6f6bf68fea..ec6edabdbd 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -56,7 +56,8 @@ LIR* MipsMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
 }
 
 bool MipsMir2Lir::InexpensiveConstantInt(int32_t value) {
-  return ((value == 0) || IsUint(16, value) || ((value < 0) && (value >= -32768)));
+  // For encodings, see LoadConstantNoClobber below.
+  return ((value == 0) || IsUint<16>(value) || IsInt<16>(value));
 }
 
 bool MipsMir2Lir::InexpensiveConstantFloat(int32_t value) {
@@ -96,9 +97,11 @@ LIR* MipsMir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
   /* See if the value can be constructed cheaply */
   if (value == 0) {
     res = NewLIR2(kMipsMove, r_dest.GetReg(), rZERO);
-  } else if ((value > 0) && (value <= 65535)) {
+  } else if (IsUint<16>(value)) {
+    // Use OR with (unsigned) immediate to encode 16b unsigned int.
     res = NewLIR3(kMipsOri, r_dest.GetReg(), rZERO, value);
-  } else if ((value < 0) && (value >= -32768)) {
+  } else if (IsInt<16>(value)) {
+    // Use ADD with (signed) immediate to encode 16b signed int.
     res = NewLIR3(kMipsAddiu, r_dest.GetReg(), rZERO, value);
   } else {
     res = NewLIR2(kMipsLui, r_dest.GetReg(), value >> 16);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 274e078399..34e5e25efe 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -24,6 +24,69 @@
 
 namespace art {
 
+class Mir2Lir::SpecialSuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
+ public:
+  SpecialSuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
+      : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont),
+        num_used_args_(0u) {
+  }
+
+  void PreserveArg(int in_position) {
+    // Avoid duplicates.
+    for (size_t i = 0; i != num_used_args_; ++i) {
+      if (used_args_[i] == in_position) {
+        return;
+      }
+    }
+    DCHECK_LT(num_used_args_, kMaxArgsToPreserve);
+    used_args_[num_used_args_] = in_position;
+    ++num_used_args_;
+  }
+
+  void Compile() OVERRIDE {
+    m2l_->ResetRegPool();
+    m2l_->ResetDefTracking();
+    GenerateTargetLabel(kPseudoSuspendTarget);
+
+    m2l_->LockCallTemps();
+
+    // Generate frame.
+    m2l_->GenSpecialEntryForSuspend();
+
+    // Spill all args.
+    for (size_t i = 0, end = m2l_->in_to_reg_storage_mapping_.GetEndMappedIn(); i < end;
+        i += m2l_->in_to_reg_storage_mapping_.GetShorty(i).IsWide() ? 2u : 1u) {
+      m2l_->SpillArg(i);
+    }
+
+    m2l_->FreeCallTemps();
+
+    // Do the actual suspend call to runtime.
+    m2l_->CallRuntimeHelper(kQuickTestSuspend, true);
+
+    m2l_->LockCallTemps();
+
+    // Unspill used regs. (Don't unspill unused args.)
+    for (size_t i = 0; i != num_used_args_; ++i) {
+      m2l_->UnspillArg(used_args_[i]);
+    }
+
+    // Pop the frame.
+    m2l_->GenSpecialExitForSuspend();
+
+    // Branch to the continue label.
+    DCHECK(cont_ != nullptr);
+    m2l_->OpUnconditionalBranch(cont_);
+
+    m2l_->FreeCallTemps();
+  }
+
+ private:
+  static constexpr size_t kMaxArgsToPreserve = 2u;
+  size_t num_used_args_;
+  int used_args_[kMaxArgsToPreserve];
+};
+
 RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) {
   RegisterClass res;
   switch (shorty_type) {
@@ -54,15 +117,15 @@ RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) {
   return res;
 }
 
-void Mir2Lir::LockArg(int in_position, bool) {
-  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+void Mir2Lir::LockArg(size_t in_position) {
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
 
   if (reg_arg.Valid()) {
     LockTemp(reg_arg);
   }
 }
 
-RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
+RegStorage Mir2Lir::LoadArg(size_t in_position, RegisterClass reg_class, bool wide) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
 
@@ -82,7 +145,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide)
     offset += sizeof(uint64_t);
   }
 
-  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
 
   // TODO: REVISIT: This adds a spill of low part while we could just copy it.
   if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) {
@@ -112,7 +175,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide)
   return reg_arg;
 }
 
-void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+void Mir2Lir::LoadArgDirect(size_t in_position, RegLocation rl_dest) {
   DCHECK_EQ(rl_dest.location, kLocPhysReg);
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -132,7 +195,7 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
     offset += sizeof(uint64_t);
   }
 
-  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
 
   // TODO: REVISIT: This adds a spill of low part while we could just copy it.
   if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) {
@@ -153,6 +216,41 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
   }
 }
 
+void Mir2Lir::SpillArg(size_t in_position) {
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
+
+  if (reg_arg.Valid()) {
+    int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
+    ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position);
+    OpSize size = arg.IsRef() ? kReference :
+        (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32;
+    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile);
+  }
+}
+
+void Mir2Lir::UnspillArg(size_t in_position) {
+  RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
+
+  if (reg_arg.Valid()) {
+    int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
+    ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position);
+    OpSize size = arg.IsRef() ? kReference :
+        (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32;
+    LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile);
+  }
+}
+
+Mir2Lir::SpecialSuspendCheckSlowPath* Mir2Lir::GenSpecialSuspendTest() {
+  LockCallTemps();
+  LIR* branch = OpTestSuspend(nullptr);
+  FreeCallTemps();
+  LIR* cont = NewLIR0(kPseudoTargetLabel);
+  SpecialSuspendCheckSlowPath* slow_path =
+      new (arena_) SpecialSuspendCheckSlowPath(this, branch, cont);
+  AddSlowPath(slow_path);
+  return slow_path;
+}
+
 bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
   // FastInstance() already checked by DexFileMethodInliner.
   const InlineIGetIPutData& data = special.d.ifield_data;
@@ -161,13 +259,16 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
     return false;
   }
 
-  OpSize size = k32;
+  OpSize size;
   switch (data.op_variant) {
-    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT):
-      size = kReference;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET):
+      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32;
       break;
     case InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE):
-      size = k64;
+      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64;
+      break;
+    case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT):
+      size = kReference;
       break;
     case InlineMethodAnalyser::IGetVariant(Instruction::IGET_SHORT):
       size = kSignedHalf;
@@ -181,11 +282,18 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
     case InlineMethodAnalyser::IGetVariant(Instruction::IGET_BOOLEAN):
       size = kUnsignedByte;
       break;
+    default:
+      LOG(FATAL) << "Unknown variant: " << data.op_variant;
+      UNREACHABLE();
   }
 
   // Point of no return - no aborts after this
-  GenPrintLabel(mir);
+  if (!kLeafOptimization) {
+    auto* slow_path = GenSpecialSuspendTest();
+    slow_path->PreserveArg(data.object_arg);
+  }
   LockArg(data.object_arg);
+  GenPrintLabel(mir);
   RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
@@ -223,13 +331,16 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
     return false;
   }
 
-  OpSize size = k32;
+  OpSize size;
   switch (data.op_variant) {
-    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT):
-      size = kReference;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT):
+      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32;
       break;
     case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE):
-      size = k64;
+      size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64;
+      break;
+    case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT):
+      size = kReference;
       break;
     case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_SHORT):
       size = kSignedHalf;
@@ -243,12 +354,20 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
     case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_BOOLEAN):
       size = kUnsignedByte;
       break;
+    default:
+      LOG(FATAL) << "Unknown variant: " << data.op_variant;
+      UNREACHABLE();
   }
 
   // Point of no return - no aborts after this
-  GenPrintLabel(mir);
+  if (!kLeafOptimization) {
+    auto* slow_path = GenSpecialSuspendTest();
+    slow_path->PreserveArg(data.object_arg);
+    slow_path->PreserveArg(data.src_arg);
+  }
   LockArg(data.object_arg);
-  LockArg(data.src_arg, IsWide(size));
+  LockArg(data.src_arg);
+  GenPrintLabel(mir);
   RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegStorage reg_src = LoadArg(data.src_arg, reg_class, IsWide(size));
@@ -269,8 +388,12 @@ bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) {
   bool wide = (data.is_wide != 0u);
 
   // Point of no return - no aborts after this
+  if (!kLeafOptimization) {
+    auto* slow_path = GenSpecialSuspendTest();
+    slow_path->PreserveArg(data.arg);
+  }
+  LockArg(data.arg);
   GenPrintLabel(mir);
-  LockArg(data.arg, wide);
   RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]);
   RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class);
   LoadArgDirect(data.arg, rl_dest);
@@ -285,15 +408,22 @@ bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& speci
   current_dalvik_offset_ = mir->offset;
   MIR* return_mir = nullptr;
   bool successful = false;
+  EnsureInitializedArgMappingToPhysicalReg();
 
   switch (special.opcode) {
     case kInlineOpNop:
       successful = true;
       DCHECK_EQ(mir->dalvikInsn.opcode, Instruction::RETURN_VOID);
+      if (!kLeafOptimization) {
+        GenSpecialSuspendTest();
+      }
       return_mir = mir;
       break;
     case kInlineOpNonWideConst: {
       successful = true;
+      if (!kLeafOptimization) {
+        GenSpecialSuspendTest();
+      }
       RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0]));
       GenPrintLabel(mir);
       LoadConstant(rl_dest.reg, static_cast<int>(special.d.data));
@@ -333,13 +463,17 @@ bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& speci
     }
     GenSpecialExitSequence();
 
-    core_spill_mask_ = 0;
-    num_core_spills_ = 0;
-    fp_spill_mask_ = 0;
-    num_fp_spills_ = 0;
-    frame_size_ = 0;
-    core_vmap_table_.clear();
-    fp_vmap_table_.clear();
+    if (!kLeafOptimization) {
+      HandleSlowPaths();
+    } else {
+      core_spill_mask_ = 0;
+      num_core_spills_ = 0;
+      fp_spill_mask_ = 0;
+      num_fp_spills_ = 0;
+      frame_size_ = 0;
+      core_vmap_table_.clear();
+      fp_vmap_table_.clear();
+    }
   }
 
   return successful;
@@ -1195,9 +1329,7 @@ void Mir2Lir::MethodMIR2LIR() {
   cu_->NewTimingSplit("MIR2LIR");
 
   // Hold the labels of each block.
-  block_label_list_ =
-      static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(),
-                                      kArenaAllocLIR));
+  block_label_list_ = arena_->AllocArray<LIR>(mir_graph_->GetNumBlocks(), kArenaAllocLIR);
 
   PreOrderDfsIterator iter(mir_graph_);
   BasicBlock* curr_bb = iter.Next();
@@ -1289,31 +1421,41 @@ void Mir2Lir::InToRegStorageMapping::Initialize(ShortyIterator* shorty,
                                                 InToRegStorageMapper* mapper) {
   DCHECK(mapper != nullptr);
   DCHECK(shorty != nullptr);
-  max_mapped_in_ = -1;
-  has_arguments_on_stack_ = false;
+  DCHECK(!IsInitialized());
+  DCHECK_EQ(end_mapped_in_, 0u);
+  DCHECK(!has_arguments_on_stack_);
   while (shorty->Next()) {
      ShortyArg arg = shorty->GetArg();
      RegStorage reg = mapper->GetNextReg(arg);
+     mapping_.emplace_back(arg, reg);
+     if (arg.IsWide()) {
+       mapping_.emplace_back(ShortyArg(kInvalidShorty), RegStorage::InvalidReg());
+     }
      if (reg.Valid()) {
-       mapping_.Put(count_, reg);
-       max_mapped_in_ = count_;
-       // If the VR is wide and was mapped as wide then account for it.
-       if (arg.IsWide() && reg.Is64Bit()) {
-         max_mapped_in_++;
+       end_mapped_in_ = mapping_.size();
+       // If the VR is wide but wasn't mapped as wide then account for it.
+       if (arg.IsWide() && !reg.Is64Bit()) {
+         --end_mapped_in_;
        }
      } else {
        has_arguments_on_stack_ = true;
      }
-     count_ += arg.IsWide() ? 2 : 1;
   }
   initialized_ = true;
 }
 
-RegStorage Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+RegStorage Mir2Lir::InToRegStorageMapping::GetReg(size_t in_position) {
+  DCHECK(IsInitialized());
+  DCHECK_LT(in_position, mapping_.size());
+  DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty);
+  return mapping_[in_position].second;
+}
+
+Mir2Lir::ShortyArg Mir2Lir::InToRegStorageMapping::GetShorty(size_t in_position) {
   DCHECK(IsInitialized());
-  DCHECK_LT(in_position, count_);
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+  DCHECK_LT(static_cast<size_t>(in_position), mapping_.size());
+  DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty);
+  return mapping_[in_position].first;
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 888c34eb24..6f3f057038 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -17,6 +17,9 @@
 #ifndef ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
 #define ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
 
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
 #include "compiled_method.h"
 #include "dex/compiler_enums.h"
 #include "dex/dex_flags.h"
@@ -29,9 +32,6 @@
 #include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
-#include "utils/arena_allocator.h"
-#include "utils/arena_containers.h"
-#include "utils/arena_object.h"
 #include "utils/stack_checks.h"
 
 namespace art {
@@ -146,7 +146,7 @@ typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
                             uint32_t method_idx, uintptr_t direct_code,
                             uintptr_t direct_method, InvokeType type);
 
-typedef std::vector<uint8_t> CodeBuffer;
+typedef ArenaVector<uint8_t> CodeBuffer;
 typedef uint32_t CodeOffset;           // Native code offset in bytes.
 
 struct UseDefMasks {
@@ -224,7 +224,7 @@ class Mir2Lir {
 
     struct SwitchTable : EmbeddedData {
       LIR* anchor;                // Reference instruction for relative offsets.
-      LIR** targets;              // Array of case targets.
+      MIR* switch_mir;            // The switch mir.
     };
 
     /* Static register use counts */
@@ -515,6 +515,9 @@ class Mir2Lir {
       LIR* const cont_;
     };
 
+    class SuspendCheckSlowPath;
+    class SpecialSuspendCheckSlowPath;
+
     // Helper class for changing mem_ref_type_ until the end of current scope. See mem_ref_type_.
     class ScopedMemRefType {
      public:
@@ -592,7 +595,7 @@ class Mir2Lir {
     // strdup(), but allocates from the arena.
     char* ArenaStrdup(const char* str) {
       size_t len = strlen(str) + 1;
-      char* res = reinterpret_cast<char*>(arena_->Alloc(len, kArenaAllocMisc));
+      char* res = arena_->AllocArray<char>(len, kArenaAllocMisc);
       if (res != NULL) {
         strncpy(res, str, len);
       }
@@ -653,7 +656,6 @@ class Mir2Lir {
     LIR* ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx);
     LIR* AddWordData(LIR* *constant_list_p, int value);
     LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi);
-    void ProcessSwitchTables();
     void DumpSparseSwitchTable(const uint16_t* table);
     void DumpPackedSwitchTable(const uint16_t* table);
     void MarkBoundary(DexOffset offset, const char* inst_str);
@@ -671,9 +673,7 @@ class Mir2Lir {
     int AssignLiteralOffset(CodeOffset offset);
     int AssignSwitchTablesOffset(CodeOffset offset);
     int AssignFillArrayDataOffset(CodeOffset offset);
-    virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
-    virtual void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
-    void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec);
+    LIR* InsertCaseLabel(uint32_t bbid, int keyVal);
 
     // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation.  No code generated.
     virtual RegLocation NarrowRegLoc(RegLocation loc);
@@ -1206,7 +1206,7 @@ class Mir2Lir {
       }
     }
 
-    RegStorage GetArgMappingToPhysicalReg(int arg_num);
+    void EnsureInitializedArgMappingToPhysicalReg();
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
@@ -1573,6 +1573,16 @@ class Mir2Lir {
     virtual void GenSpecialExitSequence() = 0;
 
     /**
+     * @brief Used to generate stack frame for suspend path of special methods.
+     */
+    virtual void GenSpecialEntryForSuspend() = 0;
+
+    /**
+     * @brief Used to pop the stack frame for suspend path of special methods.
+     */
+    virtual void GenSpecialExitForSuspend() = 0;
+
+    /**
      * @brief Used to generate code for special methods that are known to be
      * small enough to work in frameless mode.
      * @param bb The basic block of the first MIR.
@@ -1593,9 +1603,8 @@ class Mir2Lir {
      * @brief Used to lock register if argument at in_position was passed that way.
      * @details Does nothing if the argument is passed via stack.
      * @param in_position The argument number whose register to lock.
-     * @param wide Whether the argument is wide.
      */
-    void LockArg(int in_position, bool wide = false);
+    void LockArg(size_t in_position);
 
     /**
      * @brief Used to load VR argument to a physical register.
@@ -1605,14 +1614,33 @@ class Mir2Lir {
      * @param wide Whether the argument is 64-bit or not.
      * @return Returns the register (or register pair) for the loaded argument.
      */
-    RegStorage LoadArg(int in_position, RegisterClass reg_class, bool wide = false);
+    RegStorage LoadArg(size_t in_position, RegisterClass reg_class, bool wide = false);
 
     /**
      * @brief Used to load a VR argument directly to a specified register location.
      * @param in_position The argument number to place in register.
      * @param rl_dest The register location where to place argument.
      */
-    void LoadArgDirect(int in_position, RegLocation rl_dest);
+    void LoadArgDirect(size_t in_position, RegLocation rl_dest);
+
+    /**
+     * @brief Used to spill register if argument at in_position was passed that way.
+     * @details Does nothing if the argument is passed via stack.
+     * @param in_position The argument number whose register to spill.
+     */
+    void SpillArg(size_t in_position);
+
+    /**
+     * @brief Used to unspill register if argument at in_position was passed that way.
+     * @details Does nothing if the argument is passed via stack.
+     * @param in_position The argument number whose register to spill.
+     */
+    void UnspillArg(size_t in_position);
+
+    /**
+     * @brief Generate suspend test in a special method.
+     */
+    SpecialSuspendCheckSlowPath* GenSpecialSuspendTest();
 
     /**
      * @brief Used to generate LIR for special getter method.
@@ -1745,10 +1773,10 @@ class Mir2Lir {
     // The source mapping table data (pc -> dex). More entries than in encoded_mapping_table_
     DefaultSrcMap src_mapping_table_;
     // The encoding mapping table data (dex -> pc offset and pc offset -> dex) with a size prefix.
-    std::vector<uint8_t> encoded_mapping_table_;
+    ArenaVector<uint8_t> encoded_mapping_table_;
     ArenaVector<uint32_t> core_vmap_table_;
     ArenaVector<uint32_t> fp_vmap_table_;
-    std::vector<uint8_t> native_gc_map_;
+    ArenaVector<uint8_t> native_gc_map_;
     ArenaVector<LinkerPatch> patches_;
     int num_core_spills_;
     int num_fp_spills_;
@@ -1805,21 +1833,22 @@ class Mir2Lir {
     class InToRegStorageMapping {
      public:
       explicit InToRegStorageMapping(ArenaAllocator* arena)
-          : mapping_(std::less<int>(), arena->Adapter()), count_(0),
-            max_mapped_in_(0), has_arguments_on_stack_(false),  initialized_(false) {}
+          : mapping_(arena->Adapter()),
+            end_mapped_in_(0u), has_arguments_on_stack_(false),  initialized_(false) {}
       void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper);
       /**
-       * @return the index of last VR mapped to physical register. In other words
-       * any VR starting from (return value + 1) index is mapped to memory.
+       * @return the past-the-end index of VRs mapped to physical registers.
+       * In other words any VR starting from this index is mapped to memory.
        */
-      int GetMaxMappedIn() { return max_mapped_in_; }
+      size_t GetEndMappedIn() { return end_mapped_in_; }
       bool HasArgumentsOnStack() { return has_arguments_on_stack_; }
-      RegStorage Get(int in_position);
+      RegStorage GetReg(size_t in_position);
+      ShortyArg GetShorty(size_t in_position);
       bool IsInitialized() { return initialized_; }
      private:
-      ArenaSafeMap<int, RegStorage> mapping_;
-      int count_;
-      int max_mapped_in_;
+      static constexpr char kInvalidShorty = '-';
+      ArenaVector<std::pair<ShortyArg, RegStorage>> mapping_;
+      size_t end_mapped_in_;
       bool has_arguments_on_stack_;
       bool initialized_;
     };
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 909077eca2..19c2a5a3a3 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -560,6 +560,7 @@ static uint32_t kCompilerOptimizerDisableFlags = 0 |  // Disable specific optimi
   // (1 << kNullCheckElimination) |
   // (1 << kClassInitCheckElimination) |
   // (1 << kGlobalValueNumbering) |
+  (1 << kGvnDeadCodeElimination) |
   // (1 << kLocalValueNumbering) |
   // (1 << kPromoteRegs) |
   // (1 << kTrackLiveTemps) |
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 8efafb23fe..67fb8040f7 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -1191,8 +1191,7 @@ void Mir2Lir::DoPromotion() {
   int num_regs = mir_graph_->GetNumOfCodeAndTempVRs();
   const int promotion_threshold = 1;
   // Allocate the promotion map - one entry for each Dalvik vReg or compiler temp
-  promotion_map_ = static_cast<PromotionMap*>
-      (arena_->Alloc(num_regs * sizeof(promotion_map_[0]), kArenaAllocRegAlloc));
+  promotion_map_ = arena_->AllocArray<PromotionMap>(num_regs, kArenaAllocRegAlloc);
 
   // Allow target code to add any special registers
   AdjustSpillMask();
@@ -1210,12 +1209,8 @@ void Mir2Lir::DoPromotion() {
    */
   size_t core_reg_count_size = WideGPRsAreAliases() ? num_regs : num_regs * 2;
   size_t fp_reg_count_size = WideFPRsAreAliases() ? num_regs : num_regs * 2;
-  RefCounts *core_regs =
-      static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * core_reg_count_size,
-                                            kArenaAllocRegAlloc));
-  RefCounts *fp_regs =
-      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * fp_reg_count_size,
-                                             kArenaAllocRegAlloc));
+  RefCounts *core_regs = arena_->AllocArray<RefCounts>(core_reg_count_size, kArenaAllocRegAlloc);
+  RefCounts *fp_regs = arena_->AllocArray<RefCounts>(fp_reg_count_size, kArenaAllocRegAlloc);
   // Set ssa names for original Dalvik registers
   for (int i = 0; i < num_regs; i++) {
     core_regs[i].s_reg = fp_regs[i].s_reg = i;
diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc
index 8a27ecb94f..57e8af32a2 100644
--- a/compiler/dex/quick/resource_mask.cc
+++ b/compiler/dex/quick/resource_mask.cc
@@ -18,8 +18,8 @@
 
 #include "resource_mask.h"
 
+#include "base/arena_allocator.h"
 #include "base/logging.h"
-#include "utils/arena_allocator.h"
 #include "utils.h"
 
 namespace art {
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index aa0972f861..c3db3a64e5 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -37,84 +37,6 @@ void X86Mir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocat
 }
 
 /*
- * We override InsertCaseLabel, because the first parameter represents
- * a basic block id, instead of a dex offset.
- */
-LIR* X86Mir2Lir::InsertCaseLabel(DexOffset bbid, int keyVal) {
-  LIR* boundary_lir = &block_label_list_[bbid];
-  LIR* res = boundary_lir;
-  if (cu_->verbose) {
-    // Only pay the expense if we're pretty-printing.
-    LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
-    BasicBlock* bb = mir_graph_->GetBasicBlock(bbid);
-    DCHECK(bb != nullptr);
-    new_label->dalvik_offset = bb->start_offset;;
-    new_label->opcode = kPseudoCaseLabel;
-    new_label->operands[0] = keyVal;
-    new_label->flags.fixup = kFixupLabel;
-    DCHECK(!new_label->flags.use_def_invalid);
-    new_label->u.m.def_mask = &kEncodeAll;
-    InsertLIRAfter(boundary_lir, new_label);
-    res = new_label;
-  }
-  return res;
-}
-
-void X86Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
-  const uint16_t* table = tab_rec->table;
-  const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]);
-  int entries = table[1];
-  int low_key = s4FromSwitchData(&table[2]);
-  for (int i = 0; i < entries; i++) {
-    // The value at targets[i] is a basic block id, instead of a dex offset.
-    tab_rec->targets[i] = InsertCaseLabel(targets[i], i + low_key);
-  }
-}
-
-/*
- * We convert and create a new packed switch table that stores
- * basic block ids to targets[] by examining successor blocks.
- * Note that the original packed switch table stores dex offsets to targets[].
- */
-const uint16_t* X86Mir2Lir::ConvertPackedSwitchTable(MIR* mir, const uint16_t* table) {
-  /*
-   * The original packed switch data format:
-   *  ushort ident = 0x0100  magic value
-   *  ushort size            number of entries in the table
-   *  int first_key          first (and lowest) switch case value
-   *  int targets[size]      branch targets, relative to switch opcode
-   *
-   * Total size is (4+size*2) 16-bit code units.
-   *
-   * Note that the new packed switch data format is the same as the original
-   * format, except that targets[] are basic block ids.
-   *
-   */
-  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
-  DCHECK(bb != nullptr);
-  // Get the number of entries.
-  int entries = table[1];
-  const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]);
-  int32_t starting_key = as_int32[0];
-  // Create a new table.
-  int size = sizeof(uint16_t) * (4 + entries * 2);
-  uint16_t* new_table = reinterpret_cast<uint16_t*>(arena_->Alloc(size, kArenaAllocMisc));
-  // Copy ident, size, and first_key to the new table.
-  memcpy(new_table, table, sizeof(uint16_t) * 4);
-  // Get the new targets.
-  int32_t* new_targets = reinterpret_cast<int32_t*>(&new_table[4]);
-  // Find out targets for each entry.
-  int i = 0;
-  for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
-    DCHECK_EQ(starting_key + i, successor_block_info->key);
-    // Save target basic block id.
-    new_targets[i++] = successor_block_info->block;
-  }
-  DCHECK_EQ(i, entries);
-  return new_table;
-}
-
-/*
  * Code pattern will look something like:
  *
  * mov  r_val, ..
@@ -131,16 +53,14 @@ const uint16_t* X86Mir2Lir::ConvertPackedSwitchTable(MIR* mir, const uint16_t* t
  * done:
  */
 void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* old_table = mir_graph_->GetTable(mir, table_offset);
-  const uint16_t* table = ConvertPackedSwitchTable(mir, old_table);
+  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
   // Add the table to the list - we'll process it later
   SwitchTable* tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+  tab_rec->switch_mir = mir;
   tab_rec->table = table;
   tab_rec->vaddr = current_dalvik_offset_;
   int size = table[1];
-  tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
-                                                      kArenaAllocLIR));
   switch_tables_.push_back(tab_rec);
 
   // Get the switch value
@@ -352,6 +272,41 @@ void X86Mir2Lir::GenSpecialExitSequence() {
   NewLIR0(kX86Ret);
 }
 
+void X86Mir2Lir::GenSpecialEntryForSuspend() {
+  // Keep 16-byte stack alignment, there's already the return address, so
+  //   - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI,
+  //   - for 64-bit push RAX, i.e. ArtMethod*.
+  if (!cu_->target64) {
+    DCHECK(!IsTemp(rs_rSI));
+    DCHECK(!IsTemp(rs_rDI));
+    core_spill_mask_ =
+        (1u << rs_rDI.GetRegNum()) | (1u << rs_rSI.GetRegNum()) | (1u << rs_rRET.GetRegNum());
+    num_core_spills_ = 3u;
+  } else {
+    core_spill_mask_ = (1u << rs_rRET.GetRegNum());
+    num_core_spills_ = 1u;
+  }
+  fp_spill_mask_ = 0u;
+  num_fp_spills_ = 0u;
+  frame_size_ = 16u;
+  core_vmap_table_.clear();
+  fp_vmap_table_.clear();
+  if (!cu_->target64) {
+    NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    NewLIR1(kX86Push32R, rs_rSI.GetReg());
+  }
+  NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+}
+
+void X86Mir2Lir::GenSpecialExitForSuspend() {
+  // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+  NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+  if (!cu_->target64) {
+    NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+    NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+  }
+}
+
 void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
   if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
     return;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 811d4f5d7b..20163b4b76 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -259,6 +259,8 @@ class X86Mir2Lir : public Mir2Lir {
   void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
   void GenExitSequence() OVERRIDE;
   void GenSpecialExitSequence() OVERRIDE;
+  void GenSpecialEntryForSuspend() OVERRIDE;
+  void GenSpecialExitForSuspend() OVERRIDE;
   void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
   void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
   void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
@@ -271,11 +273,8 @@ class X86Mir2Lir : public Mir2Lir {
                                      int first_bit, int second_bit) OVERRIDE;
   void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  const uint16_t* ConvertPackedSwitchTable(MIR* mir, const uint16_t* table);
   void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
   void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
-  LIR* InsertCaseLabel(DexOffset vaddr, int keyVal) OVERRIDE;
-  void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) OVERRIDE;
 
   /**
    * @brief Implement instanceof a final class with x86 specific code.
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4fe7a43a85..91168c78bd 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -863,22 +863,29 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
     RegLocation rl_src1 = info->args[0];
     RegLocation rl_src2 = info->args[2];
     RegLocation rl_dest = InlineTargetWide(info);
-    int res_vreg, src1_vreg, src2_vreg;
 
     if (rl_dest.s_reg_low == INVALID_SREG) {
       // Result is unused, the code is dead. Inlining successful, no code generated.
       return true;
     }
 
+    if (PartiallyIntersects(rl_src1, rl_dest) &&
+        PartiallyIntersects(rl_src2, rl_dest)) {
+      // A special case which we don't want to handle.
+      // This is when src1 is mapped on v0 and v1,
+      // src2 is mapped on v2, v3,
+      // result is mapped on v1, v2
+      return false;
+    }
+
+
     /*
      * If the result register is the same as the second element, then we
      * need to be careful. The reason is that the first copy will
      * inadvertently clobber the second element with the first one thus
      * yielding the wrong result. Thus we do a swap in that case.
      */
-    res_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
-    src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low);
-    if (res_vreg == src2_vreg) {
+    if (Intersects(rl_src2, rl_dest)) {
       std::swap(rl_src1, rl_src2);
     }
 
@@ -893,19 +900,30 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
      * nothing else to do because they are equal and we have already
      * moved one into the result.
      */
-    src1_vreg = mir_graph_->SRegToVReg(rl_src1.s_reg_low);
-    src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low);
-    if (src1_vreg == src2_vreg) {
+    if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
+        mir_graph_->SRegToVReg(rl_src2.s_reg_low)) {
       StoreValueWide(rl_dest, rl_result);
       return true;
     }
 
     // Free registers to make some room for the second operand.
-    // But don't try to free ourselves or promoted registers.
-    if (res_vreg != src1_vreg &&
-        IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
-      FreeTemp(rl_src1.reg);
+    // But don't try to free part of a source which intersects
+    // part of result or promoted registers.
+
+    if (IsTemp(rl_src1.reg.GetLow()) &&
+       (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) &&
+       (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) {
+      // Is low part temporary and doesn't intersect any parts of result?
+      FreeTemp(rl_src1.reg.GetLow());
     }
+
+    if (IsTemp(rl_src1.reg.GetHigh()) &&
+       (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) &&
+       (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) {
+      // Is high part temporary and doesn't intersect any parts of result?
+      FreeTemp(rl_src1.reg.GetHigh());
+    }
+
     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
 
     // Do we have a free register for intermediate calculations?
@@ -939,12 +957,15 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
     // Let's put pop 'edi' here to break a bit the dependency chain.
     if (tmp == rs_rDI) {
       NewLIR1(kX86Pop32R, tmp.GetReg());
+    } else {
+      FreeTemp(tmp);
     }
 
     // Conditionally move the other integer into the destination register.
     ConditionCode cc = is_min ? kCondGe : kCondLt;
     OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
     OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
+    FreeTemp(rl_src2.reg);
     StoreValueWide(rl_dest, rl_result);
     return true;
   }
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index c4adb09248..8f97d1e7c8 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -1051,10 +1051,10 @@ void X86Mir2Lir::InstallLiteralPools() {
     }
 
     for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
-      PushWord(&code_buffer_, p->operands[0]);
-      PushWord(&code_buffer_, p->operands[1]);
-      PushWord(&code_buffer_, p->operands[2]);
-      PushWord(&code_buffer_, p->operands[3]);
+      Push32(&code_buffer_, p->operands[0]);
+      Push32(&code_buffer_, p->operands[1]);
+      Push32(&code_buffer_, p->operands[2]);
+      Push32(&code_buffer_, p->operands[3]);
     }
   }
 
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 6bd49de989..197f66d017 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -16,9 +16,9 @@
 
 #include "base/bit_vector-inl.h"
 #include "base/logging.h"
+#include "base/scoped_arena_containers.h"
 #include "compiler_ir.h"
 #include "dataflow_iterator-inl.h"
-#include "utils/scoped_arena_containers.h"
 
 #define NOTVISITED (-1)
 
@@ -137,8 +137,8 @@ void MIRGraph::ComputeDefBlockMatrix() {
   /* Allocate num_registers bit vector pointers */
   DCHECK(temp_scoped_alloc_ != nullptr);
   DCHECK(temp_.ssa.def_block_matrix == nullptr);
-  temp_.ssa.def_block_matrix = static_cast<ArenaBitVector**>(
-      temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * num_registers, kArenaAllocDFInfo));
+  temp_.ssa.def_block_matrix =
+      temp_scoped_alloc_->AllocArray<ArenaBitVector*>(num_registers, kArenaAllocDFInfo);
   int i;
 
   /* Initialize num_register vectors with num_blocks bits each */
@@ -363,8 +363,7 @@ void MIRGraph::ComputeDominators() {
 
   /* Initialize & Clear i_dom_list */
   if (max_num_reachable_blocks_ < num_reachable_blocks_) {
-    i_dom_list_ = static_cast<int*>(arena_->Alloc(sizeof(int) * num_reachable_blocks,
-                                                  kArenaAllocDFInfo));
+    i_dom_list_ = arena_->AllocArray<int>(num_reachable_blocks, kArenaAllocDFInfo);
   }
   for (int i = 0; i < num_reachable_blocks; i++) {
     i_dom_list_[i] = NOTVISITED;
@@ -463,24 +462,28 @@ bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) {
   return false;
 }
 
-/* Insert phi nodes to for each variable to the dominance frontiers */
-void MIRGraph::InsertPhiNodes() {
-  int dalvik_reg;
-  ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapPhi);
-  ArenaBitVector* input_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
-      temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapInputBlocks);
-
+/* For each dalvik reg, find blocks that need phi nodes according to the dominance frontiers. */
+void MIRGraph::FindPhiNodeBlocks() {
   RepeatingPostOrderDfsIterator iter(this);
   bool change = false;
   for (BasicBlock* bb = iter.Next(false); bb != NULL; bb = iter.Next(change)) {
     change = ComputeBlockLiveIns(bb);
   }
 
+  ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
+      temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapBMatrix);
+
+  // Reuse the def_block_matrix storage for phi_node_blocks.
+  ArenaBitVector** def_block_matrix = temp_.ssa.def_block_matrix;
+  ArenaBitVector** phi_node_blocks = def_block_matrix;
+  DCHECK(temp_.ssa.phi_node_blocks == nullptr);
+  temp_.ssa.phi_node_blocks = phi_node_blocks;
+  temp_.ssa.def_block_matrix = nullptr;
+
   /* Iterate through each Dalvik register */
-  for (dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
-    input_blocks->Copy(temp_.ssa.def_block_matrix[dalvik_reg]);
+  for (int dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
     phi_blocks->ClearAllBits();
+    ArenaBitVector* input_blocks = def_block_matrix[dalvik_reg];
     do {
       // TUNING: When we repeat this, we could skip indexes from the previous pass.
       for (uint32_t idx : input_blocks->Indexes()) {
@@ -491,23 +494,8 @@ void MIRGraph::InsertPhiNodes() {
       }
     } while (input_blocks->Union(phi_blocks));
 
-    /*
-     * Insert a phi node for dalvik_reg in the phi_blocks if the Dalvik
-     * register is in the live-in set.
-     */
-    for (uint32_t idx : phi_blocks->Indexes()) {
-      BasicBlock* phi_bb = GetBasicBlock(idx);
-      /* Variable will be clobbered before being used - no need for phi */
-      if (!phi_bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) {
-        continue;
-      }
-      MIR *phi = NewMIR();
-      phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
-      phi->dalvikInsn.vA = dalvik_reg;
-      phi->offset = phi_bb->start_offset;
-      phi->m_unit_index = 0;  // Arbitrarily assign all Phi nodes to outermost method.
-      phi_bb->PrependMIR(phi);
-    }
+    def_block_matrix[dalvik_reg] = phi_blocks;
+    phi_blocks = input_blocks;  // Reuse the bit vector in next iteration.
   }
 }
 
@@ -528,9 +516,7 @@ bool MIRGraph::InsertPhiNodeOperands(BasicBlock* bb) {
     size_t num_uses = bb->predecessors.size();
     AllocateSSAUseData(mir, num_uses);
     int* uses = mir->ssa_rep->uses;
-    BasicBlockId* incoming =
-        static_cast<BasicBlockId*>(arena_->Alloc(sizeof(BasicBlockId) * num_uses,
-                                                 kArenaAllocDFInfo));
+    BasicBlockId* incoming = arena_->AllocArray<BasicBlockId>(num_uses, kArenaAllocDFInfo);
     mir->meta.phi_incoming = incoming;
     int idx = 0;
     for (BasicBlockId pred_id : bb->predecessors) {
@@ -553,12 +539,12 @@ void MIRGraph::DoDFSPreOrderSSARename(BasicBlock* block) {
 
   /* Process this block */
   DoSSAConversion(block);
-  int map_size = sizeof(int) * GetNumOfCodeAndTempVRs();
 
   /* Save SSA map snapshot */
   ScopedArenaAllocator allocator(&cu_->arena_stack);
-  int* saved_ssa_map =
-      static_cast<int*>(allocator.Alloc(map_size, kArenaAllocDalvikToSSAMap));
+  uint32_t num_vregs = GetNumOfCodeAndTempVRs();
+  int32_t* saved_ssa_map = allocator.AllocArray<int32_t>(num_vregs, kArenaAllocDalvikToSSAMap);
+  size_t map_size = sizeof(saved_ssa_map[0]) * num_vregs;
   memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size);
 
   if (block->fall_through != NullBasicBlockId) {
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index f70850a332..b620969ae2 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -440,8 +440,7 @@ void MIRGraph::InitRegLocations() {
   // the temp allocation initializes reg location as well (in order to deal with
   // case when it will be called after this pass).
   int max_regs = GetNumSSARegs() + GetMaxPossibleCompilerTemps();
-  RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(max_regs * sizeof(*loc),
-                                                             kArenaAllocRegAlloc));
+  RegLocation* loc = arena_->AllocArray<RegLocation>(max_regs, kArenaAllocRegAlloc);
   for (int i = 0; i < GetNumSSARegs(); i++) {
     loc[i] = fresh_loc;
     loc[i].s_reg_low = i;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 2d8c9d4a9e..b8a893649b 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1285,7 +1285,15 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType
     *stats_flags |= kFlagDirectCallToBoot | kFlagDirectMethodToBoot;
   }
   if (!use_dex_cache && force_relocations) {
-    if (!IsImage() || !IsImageClass(method->GetDeclaringClassDescriptor())) {
+    bool is_in_image;
+    if (IsImage()) {
+      is_in_image = IsImageClass(method->GetDeclaringClassDescriptor());
+    } else {
+      is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 &&
+                    Runtime::Current()->GetHeap()->FindSpaceFromObject(method->GetDeclaringClass(),
+                                                                       false)->IsImageSpace();
+    }
+    if (!is_in_image) {
       // We can only branch directly to Methods that are resolved in the DexCache.
       // Otherwise we won't invoke the resolution trampoline.
       use_dex_cache = true;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 2fca2e52f4..b7562442d7 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
 #include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "class_reference.h"
@@ -38,7 +39,6 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
-#include "utils/arena_allocator.h"
 #include "utils/dedupe_set.h"
 #include "utils/swap_space.h"
 #include "utils.h"
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 9ec4f281cb..401d5a951d 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -90,19 +90,19 @@ std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
 
   // Length (will be filled in later in this routine).
   if (is_x86_64) {
-    PushWord(cfi_info, 0xffffffff);  // Indicates 64bit
-    PushWord(cfi_info, 0);
-    PushWord(cfi_info, 0);
+    Push32(cfi_info, 0xffffffff);  // Indicates 64bit
+    Push32(cfi_info, 0);
+    Push32(cfi_info, 0);
   } else {
-    PushWord(cfi_info, 0);
+    Push32(cfi_info, 0);
   }
 
   // CIE id: always 0.
   if (is_x86_64) {
-    PushWord(cfi_info, 0);
-    PushWord(cfi_info, 0);
+    Push32(cfi_info, 0);
+    Push32(cfi_info, 0);
   } else {
-    PushWord(cfi_info, 0);
+    Push32(cfi_info, 0);
   }
 
   // Version: always 1.
@@ -318,7 +318,7 @@ class LineTableGenerator FINAL : public Leb128Encoder {
     PushByte(data_, 0);  // extended opcode:
     PushByte(data_, 1 + 4);  // length: opcode_size + address_size
     PushByte(data_, DW_LNE_set_address);
-    PushWord(data_, addr);
+    Push32(data_, addr);
   }
 
   void SetLine(unsigned line) {
@@ -507,13 +507,13 @@ static void FillInCFIInformation(OatWriter* oat_writer,
   // Start the debug_info section with the header information
   // 'unit_length' will be filled in later.
   int cunit_length = dbg_info->size();
-  PushWord(dbg_info, 0);
+  Push32(dbg_info, 0);
 
   // 'version' - 3.
   PushHalf(dbg_info, 3);
 
   // Offset into .debug_abbrev section (always 0).
-  PushWord(dbg_info, 0);
+  Push32(dbg_info, 0);
 
   // Address size: 4.
   PushByte(dbg_info, 4);
@@ -523,7 +523,7 @@ static void FillInCFIInformation(OatWriter* oat_writer,
   PushByte(dbg_info, 1);
 
   // The producer is Android dex2oat.
-  PushWord(dbg_info, producer_str_offset);
+  Push32(dbg_info, producer_str_offset);
 
   // The language is Java.
   PushByte(dbg_info, DW_LANG_Java);
@@ -532,8 +532,8 @@ static void FillInCFIInformation(OatWriter* oat_writer,
   uint32_t cunit_low_pc = 0 - 1;
   uint32_t cunit_high_pc = 0;
   int cunit_low_pc_pos = dbg_info->size();
-  PushWord(dbg_info, 0);
-  PushWord(dbg_info, 0);
+  Push32(dbg_info, 0);
+  Push32(dbg_info, 0);
 
   if (dbg_line == nullptr) {
     for (size_t i = 0; i < method_info.size(); ++i) {
@@ -546,9 +546,9 @@ static void FillInCFIInformation(OatWriter* oat_writer,
       PushByte(dbg_info, 2);
 
       // Enter name, low_pc, high_pc.
-      PushWord(dbg_info, PushStr(dbg_str, dbg.method_name_));
-      PushWord(dbg_info, dbg.low_pc_ + text_section_offset);
-      PushWord(dbg_info, dbg.high_pc_ + text_section_offset);
+      Push32(dbg_info, PushStr(dbg_str, dbg.method_name_));
+      Push32(dbg_info, dbg.low_pc_ + text_section_offset);
+      Push32(dbg_info, dbg.high_pc_ + text_section_offset);
     }
   } else {
     // TODO: in gdb info functions <regexp> - reports Java functions, but
@@ -559,15 +559,15 @@ static void FillInCFIInformation(OatWriter* oat_writer,
     // method ranges.
 
     // Line number table offset
-    PushWord(dbg_info, dbg_line->size());
+    Push32(dbg_info, dbg_line->size());
 
     size_t lnt_length = dbg_line->size();
-    PushWord(dbg_line, 0);
+    Push32(dbg_line, 0);
 
     PushHalf(dbg_line, 4);  // LNT Version DWARF v4 => 4
 
     size_t lnt_hdr_length = dbg_line->size();
-    PushWord(dbg_line, 0);  // TODO: 64-bit uses 8-byte here
+    Push32(dbg_line, 0);  // TODO: 64-bit uses 8-byte here
 
     PushByte(dbg_line, 1);  // minimum_instruction_length (ubyte)
     PushByte(dbg_line, 1);  // maximum_operations_per_instruction (ubyte) = always 1
@@ -629,9 +629,9 @@ static void FillInCFIInformation(OatWriter* oat_writer,
       PushByte(dbg_info, 2);
 
       // Enter name, low_pc, high_pc.
-      PushWord(dbg_info, PushStr(dbg_str, dbg.method_name_));
-      PushWord(dbg_info, dbg.low_pc_ + text_section_offset);
-      PushWord(dbg_info, dbg.high_pc_ + text_section_offset);
+      Push32(dbg_info, PushStr(dbg_str, dbg.method_name_));
+      Push32(dbg_info, dbg.low_pc_ + text_section_offset);
+      Push32(dbg_info, dbg.high_pc_ + text_section_offset);
 
       GetLineInfoForJava(dbg.dbgstream_, dbg.compiled_method_->GetSrcMappingTable(),
                          &pc2java_map, dbg.low_pc_);
diff --git a/compiler/gc_map_builder.h b/compiler/gc_map_builder.h
index bc8ad41608..4c36ef733c 100644
--- a/compiler/gc_map_builder.h
+++ b/compiler/gc_map_builder.h
@@ -26,15 +26,17 @@ namespace art {
 
 class GcMapBuilder {
  public:
-  GcMapBuilder(std::vector<uint8_t>* table, size_t entries, uint32_t max_native_offset,
+  template <typename Alloc>
+  GcMapBuilder(std::vector<uint8_t, Alloc>* table, size_t entries, uint32_t max_native_offset,
                size_t references_width)
       : entries_(entries), references_width_(entries != 0u ? references_width : 0u),
         native_offset_width_(entries != 0 && max_native_offset != 0
                              ? sizeof(max_native_offset) - CLZ(max_native_offset) / 8u
                              : 0u),
-        in_use_(entries), table_(table) {
+        in_use_(entries) {
     // Resize table and set up header.
     table->resize((EntryWidth() * entries) + sizeof(uint32_t));
+    table_ = table->data();
     CHECK_LT(native_offset_width_, 1U << 3);
     (*table)[0] = native_offset_width_ & 7;
     CHECK_LT(references_width_, 1U << 13);
@@ -65,7 +67,7 @@ class GcMapBuilder {
     uint32_t native_offset = 0;
     size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
     for (size_t i = 0; i < native_offset_width_; i++) {
-      native_offset |= (*table_)[table_offset + i] << (i * 8);
+      native_offset |= table_[table_offset + i] << (i * 8);
     }
     return native_offset;
   }
@@ -73,13 +75,13 @@ class GcMapBuilder {
   void SetCodeOffset(size_t table_index, uint32_t native_offset) {
     size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
     for (size_t i = 0; i < native_offset_width_; i++) {
-      (*table_)[table_offset + i] = (native_offset >> (i * 8)) & 0xFF;
+      table_[table_offset + i] = (native_offset >> (i * 8)) & 0xFF;
     }
   }
 
   void SetReferences(size_t table_index, const uint8_t* references) {
     size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
-    memcpy(&(*table_)[table_offset + native_offset_width_], references, references_width_);
+    memcpy(&table_[table_offset + native_offset_width_], references, references_width_);
   }
 
   size_t EntryWidth() const {
@@ -95,7 +97,7 @@ class GcMapBuilder {
   // Entries that are in use.
   std::vector<bool> in_use_;
   // The table we're building.
-  std::vector<uint8_t>* const table_;
+  uint8_t* table_;
 };
 
 }  // namespace art
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index b2342491fa..c588e1a53d 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -273,13 +273,7 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) {
 
 void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
   DCHECK(object != nullptr);
-  size_t object_size;
-  if (object->IsArtMethod()) {
-    // Methods are sized based on the target pointer size.
-    object_size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
-  } else {
-    object_size = object->SizeOf();
-  }
+  size_t object_size = object->SizeOf();
 
   // The magic happens here. We segregate objects into different bins based
   // on how likely they are to get dirty at runtime.
@@ -569,6 +563,7 @@ void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATT
   }
   mirror::String* string = obj->AsString();
   const uint16_t* utf16_string = string->GetCharArray()->GetData() + string->GetOffset();
+  size_t utf16_length = static_cast<size_t>(string->GetLength());
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
   size_t dex_cache_count = class_linker->GetDexCacheCount();
@@ -576,10 +571,10 @@ void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATT
     DexCache* dex_cache = class_linker->GetDexCache(i);
     const DexFile& dex_file = *dex_cache->GetDexFile();
     const DexFile::StringId* string_id;
-    if (UNLIKELY(string->GetLength() == 0)) {
+    if (UNLIKELY(utf16_length == 0)) {
       string_id = dex_file.FindStringId("");
     } else {
-      string_id = dex_file.FindStringId(utf16_string);
+      string_id = dex_file.FindStringId(utf16_string, utf16_length);
     }
     if (string_id != nullptr) {
       // This string occurs in this dex file, assign the dex cache entry.
@@ -931,7 +926,7 @@ void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) {
   if (obj->IsArtMethod()) {
     // Size without pointer fields since we don't want to overrun the buffer if target art method
     // is 32 bits but source is 64 bits.
-    n = mirror::ArtMethod::SizeWithoutPointerFields(sizeof(void*));
+    n = mirror::ArtMethod::SizeWithoutPointerFields(image_writer->target_ptr_size_);
   } else {
     n = obj->SizeOf();
   }
@@ -1016,10 +1011,6 @@ void ImageWriter::FixupObject(Object* orig, Object* copy) {
   }
   if (orig->IsArtMethod<kVerifyNone>()) {
     FixupMethod(orig->AsArtMethod<kVerifyNone>(), down_cast<ArtMethod*>(copy));
-  } else if (orig->IsClass() && orig->AsClass()->IsArtMethodClass()) {
-    // Set the right size for the target.
-    size_t size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
-    down_cast<mirror::Class*>(copy)->SetObjectSizeWithoutChecks(size);
   }
 }
 
@@ -1031,7 +1022,9 @@ const uint8_t* ImageWriter::GetQuickCode(mirror::ArtMethod* method, bool* quick_
   // trampoline.
 
   // Quick entrypoint:
-  const uint8_t* quick_code = GetOatAddress(method->GetQuickOatCodeOffset());
+  uint32_t quick_oat_code_offset = PointerToLowMemUInt32(
+      method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_));
+  const uint8_t* quick_code = GetOatAddress(quick_oat_code_offset);
   *quick_is_interpreted = false;
   if (quick_code != nullptr &&
       (!method->IsStatic() || method->IsConstructor() || method->GetDeclaringClass()->IsInitialized())) {
@@ -1082,11 +1075,12 @@ void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
   // locations.
   // Copy all of the fields from the runtime methods to the target methods first since we did a
   // bytewise copy earlier.
-  copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(orig->GetEntryPointFromInterpreter(),
-                                                         target_ptr_size_);
-  copy->SetEntryPointFromJniPtrSize<kVerifyNone>(orig->GetEntryPointFromJni(), target_ptr_size_);
+  copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(
+      orig->GetEntryPointFromInterpreterPtrSize(target_ptr_size_), target_ptr_size_);
+  copy->SetEntryPointFromJniPtrSize<kVerifyNone>(
+      orig->GetEntryPointFromJniPtrSize(target_ptr_size_), target_ptr_size_);
   copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
-      orig->GetEntryPointFromQuickCompiledCode(), target_ptr_size_);
+      orig->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_), target_ptr_size_);
 
   // The resolution method has a special trampoline to call.
   Runtime* runtime = Runtime::Current();
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 3c36ffa4e9..9c0157e885 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -899,7 +899,8 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
 class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
  public:
   InitImageMethodVisitor(OatWriter* writer, size_t offset)
-    : OatDexMethodVisitor(writer, offset) {
+    : OatDexMethodVisitor(writer, offset),
+      pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) {
   }
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
@@ -932,10 +933,14 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
       std::string dump = exc->Dump();
       LOG(FATAL) << dump;
     }
-    method->SetQuickOatCodeOffset(offsets.code_offset_);
+    method->SetEntryPointFromQuickCompiledCodePtrSize(reinterpret_cast<void*>(offsets.code_offset_),
+                                                      pointer_size_);
 
     return true;
   }
+
+ protected:
+  const size_t pointer_size_;
 };
 
 class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
@@ -1103,10 +1108,18 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
     if (UNLIKELY(target_offset == 0)) {
       mirror::ArtMethod* target = GetTargetMethod(patch);
       DCHECK(target != nullptr);
-      DCHECK_EQ(target->GetQuickOatCodeOffset(), 0u);
-      target_offset = target->IsNative()
-          ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset()
-          : writer_->oat_header_->GetQuickToInterpreterBridgeOffset();
+      size_t size = GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet());
+      const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size);
+      if (oat_code_offset != 0) {
+        DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(oat_code_offset));
+        DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(oat_code_offset));
+        DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(oat_code_offset));
+        target_offset = PointerToLowMemUInt32(oat_code_offset);
+      } else {
+        target_offset = target->IsNative()
+            ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset()
+            : writer_->oat_header_->GetQuickToInterpreterBridgeOffset();
+      }
     }
     return target_offset;
   }
@@ -1138,10 +1151,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
 
   void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // NOTE: Direct calls across oat files don't use linker patches.
-    DCHECK(writer_->image_writer_ != nullptr);
-    uint32_t address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
-                                             writer_->oat_data_offset_ + target_offset);
+    uint32_t address = writer_->image_writer_ == nullptr ? target_offset :
+        PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
+                              writer_->oat_data_offset_ + target_offset);
     DCHECK_LE(offset + 4, code->size());
     uint8_t* data = &(*code)[offset];
     data[0] = address & 0xffu;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index d6c3515726..811a3bdf0c 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
+#include "base/arena_containers.h"
 #include "bounds_check_elimination.h"
 #include "nodes.h"
-#include "utils/arena_containers.h"
 
 namespace art {
 
@@ -28,18 +28,11 @@ class MonotonicValueRange;
  */
 class ValueBound : public ValueObject {
  public:
-  ValueBound(HInstruction* instruction, int constant) {
+  ValueBound(HInstruction* instruction, int32_t constant) {
     if (instruction != nullptr && instruction->IsIntConstant()) {
-      // Normalizing ValueBound with constant instruction.
-      int instr_const = instruction->AsIntConstant()->GetValue();
-      if (constant >= 0 && (instr_const <= INT_MAX - constant)) {
-        // No overflow.
-        instruction_ = nullptr;
-        constant_ = instr_const + constant;
-        return;
-      }
-      if (constant < 0 && (instr_const >= INT_MIN - constant)) {
-        // No underflow.
+      // Normalize ValueBound with constant instruction.
+      int32_t instr_const = instruction->AsIntConstant()->GetValue();
+      if (!WouldAddOverflowOrUnderflow(instr_const, constant)) {
         instruction_ = nullptr;
         constant_ = instr_const + constant;
         return;
@@ -49,6 +42,41 @@ class ValueBound : public ValueObject {
     constant_ = constant;
   }
 
+  // Return whether (left + right) overflows or underflows.
+  static bool WouldAddOverflowOrUnderflow(int32_t left, int32_t right) {
+    if (right == 0) {
+      return false;
+    }
+    if ((right > 0) && (left <= INT_MAX - right)) {
+      // No overflow.
+      return false;
+    }
+    if ((right < 0) && (left >= INT_MIN - right)) {
+      // No underflow.
+      return false;
+    }
+    return true;
+  }
+
+  static bool IsAddOrSubAConstant(HInstruction* instruction,
+                                  HInstruction** left_instruction,
+                                  int* right_constant) {
+    if (instruction->IsAdd() || instruction->IsSub()) {
+      HBinaryOperation* bin_op = instruction->AsBinaryOperation();
+      HInstruction* left = bin_op->GetLeft();
+      HInstruction* right = bin_op->GetRight();
+      if (right->IsIntConstant()) {
+        *left_instruction = left;
+        int32_t c = right->AsIntConstant()->GetValue();
+        *right_constant = instruction->IsAdd() ? c : -c;
+        return true;
+      }
+    }
+    *left_instruction = nullptr;
+    *right_constant = 0;
+    return false;
+  }
+
   // Try to detect useful value bound format from an instruction, e.g.
   // a constant or array length related value.
   static ValueBound DetectValueBoundFromValue(HInstruction* instruction, bool* found) {
@@ -63,13 +91,12 @@ class ValueBound : public ValueObject {
       return ValueBound(instruction, 0);
     }
     // Try to detect (array.length + c) format.
-    if (instruction->IsAdd()) {
-      HAdd* add = instruction->AsAdd();
-      HInstruction* left = add->GetLeft();
-      HInstruction* right = add->GetRight();
-      if (left->IsArrayLength() && right->IsIntConstant()) {
+    HInstruction *left;
+    int32_t right;
+    if (IsAddOrSubAConstant(instruction, &left, &right)) {
+      if (left->IsArrayLength()) {
         *found = true;
-        return ValueBound(left, right->AsIntConstant()->GetValue());
+        return ValueBound(left, right);
       }
     }
 
@@ -79,10 +106,13 @@ class ValueBound : public ValueObject {
   }
 
   HInstruction* GetInstruction() const { return instruction_; }
-  int GetConstant() const { return constant_; }
+  int32_t GetConstant() const { return constant_; }
 
-  bool IsRelativeToArrayLength() const {
-    return instruction_ != nullptr && instruction_->IsArrayLength();
+  bool IsRelatedToArrayLength() const {
+    // Some bounds are created with HNewArray* as the instruction instead
+    // of HArrayLength*. They are treated the same.
+    return (instruction_ != nullptr) &&
+           (instruction_->IsArrayLength() || instruction_->IsNewArray());
   }
 
   bool IsConstant() const {
@@ -96,54 +126,45 @@ class ValueBound : public ValueObject {
     return instruction_ == bound.instruction_ && constant_ == bound.constant_;
   }
 
-  // Returns if it's certain bound1 >= bound2.
-  bool GreaterThanOrEqual(ValueBound bound) const {
-    if (instruction_ == bound.instruction_) {
-      if (instruction_ == nullptr) {
-        // Pure constant.
-        return constant_ >= bound.constant_;
-      }
-      // There might be overflow/underflow. Be conservative for now.
-      return false;
+  static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) {
+    // Null check on the NewArray should have been eliminated by instruction
+    // simplifier already.
+    if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
+      return instruction->InputAt(0)->AsNewArray();
     }
-    // Not comparable. Just return false.
-    return false;
+    return instruction;
   }
 
-  // Returns if it's certain bound1 <= bound2.
-  bool LessThanOrEqual(ValueBound bound) const {
-    if (instruction_ == bound.instruction_) {
-      if (instruction_ == nullptr) {
-        // Pure constant.
-        return constant_ <= bound.constant_;
-      }
-      if (IsRelativeToArrayLength()) {
-        // Array length is guaranteed to be no less than 0.
-        // No overflow/underflow can happen if both constants are negative.
-        if (constant_ <= 0 && bound.constant_ <= 0) {
-          return constant_ <= bound.constant_;
-        }
-        // There might be overflow/underflow. Be conservative for now.
-        return false;
-      }
+  static bool Equal(HInstruction* instruction1, HInstruction* instruction2) {
+    if (instruction1 == instruction2) {
+      return true;
     }
 
-    // In case the array length is some constant, we can
-    // still compare.
-    if (IsConstant() && bound.IsRelativeToArrayLength()) {
-      HInstruction* array = bound.GetInstruction()->AsArrayLength()->InputAt(0);
-      if (array->IsNullCheck()) {
-        array = array->AsNullCheck()->InputAt(0);
-      }
-      if (array->IsNewArray()) {
-        HInstruction* len = array->InputAt(0);
-        if (len->IsIntConstant()) {
-          int len_const = len->AsIntConstant()->GetValue();
-          return constant_ <= len_const + bound.GetConstant();
-        }
-      }
+    if (instruction1 == nullptr || instruction2 == nullptr) {
+      return false;
     }
 
+    // Some bounds are created with HNewArray* as the instruction instead
+    // of HArrayLength*. They are treated the same.
+    instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1);
+    instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2);
+    return instruction1 == instruction2;
+  }
+
+  // Returns if it's certain this->bound >= `bound`.
+  bool GreaterThanOrEqualTo(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ >= bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
+  // Returns if it's certain this->bound <= `bound`.
+  bool LessThanOrEqualTo(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ <= bound.constant_;
+    }
     // Not comparable. Just return false.
     return false;
   }
@@ -151,10 +172,11 @@ class ValueBound : public ValueObject {
   // Try to narrow lower bound. Returns the greatest of the two if possible.
   // Pick one if they are not comparable.
   static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
-    if (bound1.instruction_ == bound2.instruction_) {
-      // Same instruction, compare the constant part.
-      return ValueBound(bound1.instruction_,
-                        std::max(bound1.constant_, bound2.constant_));
+    if (bound1.GreaterThanOrEqualTo(bound2)) {
+      return bound1;
+    }
+    if (bound2.GreaterThanOrEqualTo(bound1)) {
+      return bound2;
     }
 
     // Not comparable. Just pick one. We may lose some info, but that's ok.
@@ -165,58 +187,71 @@ class ValueBound : public ValueObject {
   // Try to narrow upper bound. Returns the lowest of the two if possible.
   // Pick one if they are not comparable.
   static ValueBound NarrowUpperBound(ValueBound bound1, ValueBound bound2) {
-    if (bound1.instruction_ == bound2.instruction_) {
-      // Same instruction, compare the constant part.
-      return ValueBound(bound1.instruction_,
-                        std::min(bound1.constant_, bound2.constant_));
+    if (bound1.LessThanOrEqualTo(bound2)) {
+      return bound1;
+    }
+    if (bound2.LessThanOrEqualTo(bound1)) {
+      return bound2;
     }
 
     // Not comparable. Just pick one. We may lose some info, but that's ok.
     // Favor array length as upper bound.
-    return bound1.IsRelativeToArrayLength() ? bound1 : bound2;
+    return bound1.IsRelatedToArrayLength() ? bound1 : bound2;
   }
 
-  // Add a constant to a ValueBound. If the constant part of the ValueBound
-  // overflows/underflows, then we can't accurately represent it. For correctness,
-  // just return Max/Min() depending on whether the returned ValueBound is used for
-  // lower/upper bound.
-  ValueBound Add(int c, bool* overflow_or_underflow) const {
-    *overflow_or_underflow = false;
+  // Add a constant to a ValueBound.
+  // `overflow` or `underflow` will return whether the resulting bound may
+  // overflow or underflow an int.
+  ValueBound Add(int32_t c, bool* overflow, bool* underflow) const {
+    *overflow = *underflow = false;
     if (c == 0) {
       return *this;
     }
 
-    int new_constant;
+    int32_t new_constant;
     if (c > 0) {
       if (constant_ > INT_MAX - c) {
-        // Constant part overflows.
-        *overflow_or_underflow = true;
+        *overflow = true;
         return Max();
-      } else {
-        new_constant = constant_ + c;
       }
+
+      new_constant = constant_ + c;
+      // (array.length + non-positive-constant) won't overflow an int.
+      if (IsConstant() || (IsRelatedToArrayLength() && new_constant <= 0)) {
+        return ValueBound(instruction_, new_constant);
+      }
+      // Be conservative.
+      *overflow = true;
+      return Max();
     } else {
       if (constant_ < INT_MIN - c) {
-        // Constant part underflows.
-        *overflow_or_underflow = true;
-        return Max();
-      } else {
-        new_constant = constant_ + c;
+        *underflow = true;
+        return Min();
       }
+
+      new_constant = constant_ + c;
+      // Regardless of the value new_constant, (array.length+new_constant) will
+      // never underflow since array.length is no less than 0.
+      if (IsConstant() || IsRelatedToArrayLength()) {
+        return ValueBound(instruction_, new_constant);
+      }
+      // Be conservative.
+      *underflow = true;
+      return Min();
     }
     return ValueBound(instruction_, new_constant);
   }
 
  private:
   HInstruction* instruction_;
-  int constant_;
+  int32_t constant_;
 };
 
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
  * comparisons related to array bounds, array bounds check, add/sub on top
- * of an existing value range, or a loop phi corresponding to an
+ * of an existing value range, NewArray or a loop phi corresponding to an
  * incrementing/decrementing array index (MonotonicValueRange).
  */
 class ValueRange : public ArenaObject<kArenaAllocMisc> {
@@ -241,8 +276,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> {
       return true;
     }
     DCHECK(!other_range->IsMonotonicValueRange());
-    return lower_.GreaterThanOrEqual(other_range->lower_) &&
-           upper_.LessThanOrEqual(other_range->upper_);
+    return lower_.GreaterThanOrEqualTo(other_range->lower_) &&
+           upper_.LessThanOrEqualTo(other_range->upper_);
   }
 
   // Returns the intersection of this and range.
@@ -263,29 +298,24 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> {
         ValueBound::NarrowUpperBound(upper_, range->upper_));
   }
 
-  // Shift a range by a constant. If either bound can't be represented
-  // as (instruction+c) format due to possible overflow/underflow,
-  // return the full integer range.
-  ValueRange* Add(int constant) const {
-    bool overflow_or_underflow;
-    ValueBound lower = lower_.Add(constant, &overflow_or_underflow);
-    if (overflow_or_underflow) {
-      // We can't accurately represent the bounds anymore.
-      return FullIntRange();
+  // Shift a range by a constant.
+  ValueRange* Add(int32_t constant) const {
+    bool overflow, underflow;
+    ValueBound lower = lower_.Add(constant, &overflow, &underflow);
+    if (underflow) {
+      // Lower bound underflow will wrap around to positive values
+      // and invalidate the upper bound.
+      return nullptr;
     }
-    ValueBound upper = upper_.Add(constant, &overflow_or_underflow);
-    if (overflow_or_underflow) {
-      // We can't accurately represent the bounds anymore.
-      return FullIntRange();
+    ValueBound upper = upper_.Add(constant, &overflow, &underflow);
+    if (overflow) {
+      // Upper bound overflow will wrap around to negative values
+      // and invalidate the lower bound.
+      return nullptr;
     }
     return new (allocator_) ValueRange(allocator_, lower, upper);
   }
 
-  // Return [INT_MIN, INT_MAX].
-  ValueRange* FullIntRange() const {
-    return new (allocator_) ValueRange(allocator_, ValueBound::Min(), ValueBound::Max());
-  }
-
  private:
   ArenaAllocator* const allocator_;
   const ValueBound lower_;  // inclusive
@@ -304,7 +334,7 @@ class MonotonicValueRange : public ValueRange {
  public:
   MonotonicValueRange(ArenaAllocator* allocator,
                       HInstruction* initial,
-                      int increment,
+                      int32_t increment,
                       ValueBound bound)
       // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
       // used as a regular value range, due to possible overflow/underflow.
@@ -343,23 +373,17 @@ class MonotonicValueRange : public ValueRange {
       // make assumptions about the max array length, e.g. due to the max heap size,
       // divided by the element size (such as 4 bytes for each integer array), we can
       // lower this number and rule out some possible overflows.
-      int max_array_len = INT_MAX;
-
-      int upper = INT_MAX;
-      if (range->GetUpper().IsConstant()) {
-        upper = range->GetUpper().GetConstant();
-      } else if (range->GetUpper().IsRelativeToArrayLength()) {
-        int constant = range->GetUpper().GetConstant();
-        if (constant <= 0) {
-          // Normal case. e.g. <= array.length - 1, <= array.length - 2, etc.
-          upper = max_array_len + constant;
-        } else {
-          // There might be overflow. Give up narrowing.
-          return this;
-        }
-      } else {
-        // There might be overflow. Give up narrowing.
-        return this;
+      int32_t max_array_len = INT_MAX;
+
+      // max possible integer value of range's upper value.
+      int32_t upper = INT_MAX;
+      // Try to lower upper.
+      ValueBound upper_bound = range->GetUpper();
+      if (upper_bound.IsConstant()) {
+        upper = upper_bound.GetConstant();
+      } else if (upper_bound.IsRelatedToArrayLength() && upper_bound.GetConstant() <= 0) {
+        // Normal case. e.g. <= array.length - 1.
+        upper = max_array_len + upper_bound.GetConstant();
       }
 
       // If we can prove for the last number in sequence of initial_,
@@ -368,13 +392,13 @@ class MonotonicValueRange : public ValueRange {
       // then this MonoticValueRange is narrowed to a normal value range.
 
       // Be conservative first, assume last number in the sequence hits upper.
-      int last_num_in_sequence = upper;
+      int32_t last_num_in_sequence = upper;
       if (initial_->IsIntConstant()) {
-        int initial_constant = initial_->AsIntConstant()->GetValue();
+        int32_t initial_constant = initial_->AsIntConstant()->GetValue();
         if (upper <= initial_constant) {
           last_num_in_sequence = upper;
         } else {
-          // Cast to int64_t for the substraction part to avoid int overflow.
+          // Cast to int64_t for the substraction part to avoid int32_t overflow.
           last_num_in_sequence = initial_constant +
               ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_;
         }
@@ -392,23 +416,22 @@ class MonotonicValueRange : public ValueRange {
       ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper());
 
       // Need to take care of underflow. Try to prove underflow won't happen
-      // for common cases. Basically need to be able to prove for any value
-      // that's >= range->GetLower(), it won't be positive with value+increment.
+      // for common cases.
       if (range->GetLower().IsConstant()) {
-        int constant = range->GetLower().GetConstant();
+        int32_t constant = range->GetLower().GetConstant();
         if (constant >= INT_MIN - increment_) {
           return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper);
         }
       }
 
-      // There might be underflow. Give up narrowing.
+      // For non-constant lower bound, just assume might be underflow. Give up narrowing.
       return this;
     }
   }
 
  private:
   HInstruction* const initial_;
-  const int increment_;
+  const int32_t increment_;
   ValueBound bound_;  // Additional value bound info for initial_;
 
   DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
@@ -446,13 +469,26 @@ class BCEVisitor : public HGraphVisitor {
     return nullptr;
   }
 
-  // Narrow the value range of 'instruction' at the end of 'basic_block' with 'range',
-  // and push the narrowed value range to 'successor'.
+  // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
+  // and push the narrowed value range to `successor`.
   void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
-                  HBasicBlock* successor, ValueRange* range) {
+                                HBasicBlock* successor, ValueRange* range) {
     ValueRange* existing_range = LookupValueRange(instruction, basic_block);
-    ValueRange* narrowed_range = (existing_range == nullptr) ?
-        range : existing_range->Narrow(range);
+    if (existing_range == nullptr) {
+      if (range != nullptr) {
+        GetValueRangeMap(successor)->Overwrite(instruction->GetId(), range);
+      }
+      return;
+    }
+    if (existing_range->IsMonotonicValueRange()) {
+      DCHECK(instruction->IsLoopHeaderPhi());
+      // Make sure the comparison is in the loop header so each increment is
+      // checked with a comparison.
+      if (instruction->GetBlock() != basic_block) {
+        return;
+      }
+    }
+    ValueRange* narrowed_range = existing_range->Narrow(range);
     if (narrowed_range != nullptr) {
       GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range);
     }
@@ -472,10 +508,12 @@ class BCEVisitor : public HGraphVisitor {
 
     bool found;
     ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found);
+    // Each comparison can establish a lower bound and an upper bound
+    // for the left hand side.
     ValueBound lower = bound;
     ValueBound upper = bound;
     if (!found) {
-      // No constant or array.length+c bound found.
+      // No constant or array.length+c format bound found.
       // For i<j, we can still use j's upper bound as i's upper bound. Same for lower.
       ValueRange* range = LookupValueRange(right, block);
       if (range != nullptr) {
@@ -487,13 +525,13 @@ class BCEVisitor : public HGraphVisitor {
       }
     }
 
-    bool overflow_or_underflow;
+    bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
       if (!upper.Equals(ValueBound::Max())) {
-        int compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
-        ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow);
-        if (overflow_or_underflow) {
-          new_upper = ValueBound::Max();
+        int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
+        ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
+        if (overflow || underflow) {
+          return;
         }
         ValueRange* new_range = new (GetGraph()->GetArena())
             ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
@@ -501,11 +539,11 @@ class BCEVisitor : public HGraphVisitor {
       }
 
       // array.length as a lower bound isn't considered useful.
-      if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) {
-        int compensation = (cond == kCondLE) ? 1 : 0;  // lower bound is inclusive
-        ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow);
-        if (overflow_or_underflow) {
-          new_lower = ValueBound::Min();
+      if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
+        int32_t compensation = (cond == kCondLE) ? 1 : 0;  // lower bound is inclusive
+        ValueBound new_lower = lower.Add(compensation, &overflow, &underflow);
+        if (overflow || underflow) {
+          return;
         }
         ValueRange* new_range = new (GetGraph()->GetArena())
             ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max());
@@ -513,11 +551,11 @@ class BCEVisitor : public HGraphVisitor {
       }
     } else if (cond == kCondGT || cond == kCondGE) {
       // array.length as a lower bound isn't considered useful.
-      if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) {
-        int compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
-        ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow);
-        if (overflow_or_underflow) {
-          new_lower = ValueBound::Min();
+      if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
+        int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
+        ValueBound new_lower = lower.Add(compensation, &overflow, &underflow);
+        if (overflow || underflow) {
+          return;
         }
         ValueRange* new_range = new (GetGraph()->GetArena())
             ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max());
@@ -525,10 +563,10 @@ class BCEVisitor : public HGraphVisitor {
       }
 
       if (!upper.Equals(ValueBound::Max())) {
-        int compensation = (cond == kCondGE) ? -1 : 0;  // upper bound is inclusive
-        ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow);
-        if (overflow_or_underflow) {
-          new_upper = ValueBound::Max();
+        int32_t compensation = (cond == kCondGE) ? -1 : 0;  // upper bound is inclusive
+        ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
+        if (overflow || underflow) {
+          return;
         }
         ValueRange* new_range = new (GetGraph()->GetArena())
             ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
@@ -541,41 +579,56 @@ class BCEVisitor : public HGraphVisitor {
     HBasicBlock* block = bounds_check->GetBlock();
     HInstruction* index = bounds_check->InputAt(0);
     HInstruction* array_length = bounds_check->InputAt(1);
-    ValueRange* index_range = LookupValueRange(index, block);
-
-    if (index_range != nullptr) {
-      ValueBound lower = ValueBound(nullptr, 0);        // constant 0
-      ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
-      ValueRange* array_range = new (GetGraph()->GetArena())
-          ValueRange(GetGraph()->GetArena(), lower, upper);
-      if (index_range->FitsIn(array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+    DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength());
+
+    if (!index->IsIntConstant()) {
+      ValueRange* index_range = LookupValueRange(index, block);
+      if (index_range != nullptr) {
+        ValueBound lower = ValueBound(nullptr, 0);        // constant 0
+        ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
+        ValueRange* array_range = new (GetGraph()->GetArena())
+            ValueRange(GetGraph()->GetArena(), lower, upper);
+        if (index_range->FitsIn(array_range)) {
+          ReplaceBoundsCheck(bounds_check, index);
+          return;
+        }
+      }
+    } else {
+      int32_t constant = index->AsIntConstant()->GetValue();
+      if (constant < 0) {
+        // Will always throw exception.
+        return;
+      }
+      if (array_length->IsIntConstant()) {
+        if (constant < array_length->AsIntConstant()->GetValue()) {
+          ReplaceBoundsCheck(bounds_check, index);
+        }
         return;
       }
-    }
 
-    if (index->IsIntConstant()) {
-      ValueRange* array_length_range = LookupValueRange(array_length, block);
-      int constant = index->AsIntConstant()->GetValue();
-      if (array_length_range != nullptr &&
-          array_length_range->GetLower().IsConstant()) {
-        if (constant < array_length_range->GetLower().GetConstant()) {
+      DCHECK(array_length->IsArrayLength());
+      ValueRange* existing_range = LookupValueRange(array_length, block);
+      if (existing_range != nullptr) {
+        ValueBound lower = existing_range->GetLower();
+        DCHECK(lower.IsConstant());
+        if (constant < lower.GetConstant()) {
           ReplaceBoundsCheck(bounds_check, index);
           return;
+        } else {
+          // Existing range isn't strong enough to eliminate the bounds check.
+          // Fall through to update the array_length range with info from this
+          // bounds check.
         }
       }
 
       // Once we have an array access like 'array[5] = 1', we record array.length >= 6.
+      // We currently don't do it for non-constant index since a valid array[i] can't prove
+      // a valid array[i-1] yet due to the lower bound side.
       ValueBound lower = ValueBound(nullptr, constant + 1);
       ValueBound upper = ValueBound::Max();
       ValueRange* range = new (GetGraph()->GetArena())
           ValueRange(GetGraph()->GetArena(), lower, upper);
-      ValueRange* existing_range = LookupValueRange(array_length, block);
-      ValueRange* new_range = range;
-      if (existing_range != nullptr) {
-        new_range = range->Narrow(existing_range);
-      }
-      GetValueRangeMap(block)->Overwrite(array_length->GetId(), new_range);
+      GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
     }
   }
 
@@ -588,14 +641,12 @@ class BCEVisitor : public HGraphVisitor {
     if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
       DCHECK_EQ(phi->InputCount(), 2U);
       HInstruction* instruction = phi->InputAt(1);
-      if (instruction->IsAdd()) {
-        HAdd* add = instruction->AsAdd();
-        HInstruction* left = add->GetLeft();
-        HInstruction* right = add->GetRight();
-        if (left == phi && right->IsIntConstant()) {
+      HInstruction *left;
+      int32_t increment;
+      if (ValueBound::IsAddOrSubAConstant(instruction, &left, &increment)) {
+        if (left == phi) {
           HInstruction* initial_value = phi->InputAt(0);
           ValueRange* range = nullptr;
-          int increment = right->AsIntConstant()->GetValue();
           if (increment == 0) {
             // Add constant 0. It's really a fixed value.
             range = new (GetGraph()->GetArena()) ValueRange(
@@ -676,29 +727,122 @@ class BCEVisitor : public HGraphVisitor {
     // Here we are interested in the typical triangular case of nested loops,
     // such as the inner loop 'for (int j=0; j<array.length-i; j++)' where i
     // is the index for outer loop. In this case, we know j is bounded by array.length-1.
+
+    // Try to handle (array.length - i) or (array.length + c - i) format.
+    HInstruction* left_of_left;  // left input of left.
+    int32_t right_const = 0;
+    if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &right_const)) {
+      left = left_of_left;
+    }
+    // The value of left input of the sub equals (left + right_const).
+
     if (left->IsArrayLength()) {
       HInstruction* array_length = left->AsArrayLength();
       ValueRange* right_range = LookupValueRange(right, sub->GetBlock());
       if (right_range != nullptr) {
         ValueBound lower = right_range->GetLower();
         ValueBound upper = right_range->GetUpper();
-        if (lower.IsConstant() && upper.IsRelativeToArrayLength()) {
+        if (lower.IsConstant() && upper.IsRelatedToArrayLength()) {
           HInstruction* upper_inst = upper.GetInstruction();
-          if (upper_inst->IsArrayLength() &&
-              upper_inst->AsArrayLength() == array_length) {
-            // (array.length - v) where v is in [c1, array.length + c2]
-            // gets [-c2, array.length - c1] as its value range.
-            ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
-                GetGraph()->GetArena(),
-                ValueBound(nullptr, - upper.GetConstant()),
-                ValueBound(array_length, - lower.GetConstant()));
-            GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+          // Make sure it's the same array.
+          if (ValueBound::Equal(array_length, upper_inst)) {
+            int32_t c0 = right_const;
+            int32_t c1 = lower.GetConstant();
+            int32_t c2 = upper.GetConstant();
+            // (array.length + c0 - v) where v is in [c1, array.length + c2]
+            // gets [c0 - c2, array.length + c0 - c1] as its value range.
+            if (!ValueBound::WouldAddOverflowOrUnderflow(c0, -c2) &&
+                !ValueBound::WouldAddOverflowOrUnderflow(c0, -c1)) {
+              if ((c0 - c1) <= 0) {
+                // array.length + (c0 - c1) won't overflow/underflow.
+                ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
+                    GetGraph()->GetArena(),
+                    ValueBound(nullptr, right_const - upper.GetConstant()),
+                    ValueBound(array_length, right_const - lower.GetConstant()));
+                GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+              }
+            }
           }
         }
       }
     }
   }
 
+  void FindAndHandlePartialArrayLength(HBinaryOperation* instruction) {
+    DCHECK(instruction->IsDiv() || instruction->IsShr() || instruction->IsUShr());
+    HInstruction* right = instruction->GetRight();
+    int32_t right_const;
+    if (right->IsIntConstant()) {
+      right_const = right->AsIntConstant()->GetValue();
+      // Detect division by two or more.
+      if ((instruction->IsDiv() && right_const <= 1) ||
+          (instruction->IsShr() && right_const < 1) ||
+          (instruction->IsUShr() && right_const < 1)) {
+        return;
+      }
+    } else {
+      return;
+    }
+
+    // Try to handle array.length/2 or (array.length-1)/2 format.
+    HInstruction* left = instruction->GetLeft();
+    HInstruction* left_of_left;  // left input of left.
+    int32_t c = 0;
+    if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &c)) {
+      left = left_of_left;
+    }
+    // The value of left input of instruction equals (left + c).
+
+    // (array_length + 1) or smaller divided by two or more
+    // always generate a value in [INT_MIN, array_length].
+    // This is true even if array_length is INT_MAX.
+    if (left->IsArrayLength() && c <= 1) {
+      if (instruction->IsUShr() && c < 0) {
+        // Make sure for unsigned shift, left side is not negative.
+        // e.g. if array_length is 2, ((array_length - 3) >>> 2) is way bigger
+        // than array_length.
+        return;
+      }
+      ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
+          GetGraph()->GetArena(),
+          ValueBound(nullptr, INT_MIN),
+          ValueBound(left, 0));
+      GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range);
+    }
+  }
+
+  void VisitDiv(HDiv* div) {
+    FindAndHandlePartialArrayLength(div);
+  }
+
+  void VisitShr(HShr* shr) {
+    FindAndHandlePartialArrayLength(shr);
+  }
+
+  void VisitUShr(HUShr* ushr) {
+    FindAndHandlePartialArrayLength(ushr);
+  }
+
+  void VisitNewArray(HNewArray* new_array) {
+    HInstruction* len = new_array->InputAt(0);
+    if (!len->IsIntConstant()) {
+      HInstruction *left;
+      int32_t right_const;
+      if (ValueBound::IsAddOrSubAConstant(len, &left, &right_const)) {
+        // (left + right_const) is used as size to new the array.
+        // We record "-right_const <= left <= new_array - right_const";
+        ValueBound lower = ValueBound(nullptr, -right_const);
+        // We use new_array for the bound instead of new_array.length,
+        // which isn't available as an instruction yet. new_array will
+        // be treated the same as new_array.length when it's used in a ValueBound.
+        ValueBound upper = ValueBound(new_array, -right_const);
+        ValueRange* range = new (GetGraph()->GetArena())
+            ValueRange(GetGraph()->GetArena(), lower, upper);
+        GetValueRangeMap(new_array->GetBlock())->Overwrite(left->GetId(), range);
+      }
+    }
+  }
+
   std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_;
 
   DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 3dcb08d195..a298413d14 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -14,19 +14,22 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "bounds_check_elimination.h"
 #include "builder.h"
 #include "gvn.h"
+#include "instruction_simplifier.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "side_effects_analysis.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
 namespace art {
 
-static void RunGvn(HGraph* graph) {
+static void RunSimplifierAndGvn(HGraph* graph) {
+  InstructionSimplifier simplify(graph);
+  simplify.Run();
   SideEffectsAnalysis side_effects(graph);
   side_effects.Run();
   GVNOptimization(graph, side_effects).Run();
@@ -127,7 +130,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
   block3->AddSuccessor(block4);  // False successor
 
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
   ASSERT_FALSE(IsRemoved(bounds_check2));
@@ -202,7 +205,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
   block3->AddSuccessor(exit);
 
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -277,7 +280,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
   block3->AddSuccessor(exit);
 
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -351,7 +354,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
   exit->AddInstruction(new (&allocator) HExit());
 
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
   ASSERT_FALSE(IsRemoved(bounds_check5));
@@ -397,7 +400,6 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
   loop_body->AddSuccessor(loop_header);
 
   HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
-  phi->AddInput(constant_initial);
   HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
   HInstruction* array_length = new (allocator) HArrayLength(null_check);
   HInstruction* cmp = nullptr;
@@ -413,6 +415,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
   loop_header->AddInstruction(array_length);
   loop_header->AddInstruction(cmp);
   loop_header->AddInstruction(if_inst);
+  phi->AddInput(constant_initial);
 
   null_check = new (allocator) HNullCheck(parameter, 0);
   array_length = new (allocator) HArrayLength(null_check);
@@ -450,7 +453,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
   // HArrayLength which uses the null check as its input.
   graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
   bounds_check_elimination_after_gvn.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -458,7 +461,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
   // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
   bounds_check_elimination_with_initial_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -466,7 +469,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
   // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
   bounds_check_elimination_with_initial_minus_1.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -474,7 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
   // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
   bounds_check_elimination_with_greater_than.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -483,7 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
   //   array[i] = 10; // Can't eliminate due to overflow concern. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_increment_2(graph);
   bounds_check_elimination_with_increment_2.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -491,7 +494,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
   // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph);
   bounds_check_elimination_with_increment_2_from_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -541,7 +544,6 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
   loop_body->AddSuccessor(loop_header);
 
   HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
-  phi->AddInput(array_length);
   HInstruction* cmp = nullptr;
   if (cond == kCondLE) {
     cmp = new (allocator) HLessThanOrEqual(phi, constant_initial);
@@ -553,6 +555,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
   loop_header->AddPhi(phi);
   loop_header->AddInstruction(cmp);
   loop_header->AddInstruction(if_inst);
+  phi->AddInput(array_length);
 
   HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1);
   null_check = new (allocator) HNullCheck(parameter, 0);
@@ -591,7 +594,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
   // HArrayLength which uses the null check as its input.
   graph = BuildSSAGraph2(&allocator, &bounds_check, 0);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
   bounds_check_elimination_after_gvn.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -599,7 +602,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
   // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. }
   graph = BuildSSAGraph2(&allocator, &bounds_check, 1);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
   bounds_check_elimination_with_initial_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -607,7 +610,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
   // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. }
   graph = BuildSSAGraph2(&allocator, &bounds_check, -1);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
   bounds_check_elimination_with_initial_minus_1.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -615,7 +618,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
   // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. }
   graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_less_than(graph);
   bounds_check_elimination_with_less_than.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -623,13 +626,13 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
   // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. }
   graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph);
   bounds_check_elimination_increment_minus_2.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
 }
 
-// int[] array = new array[10];
+// int[] array = new int[10];
 // for (int i=0; i<10; i+=increment) { array[i] = 10; }
 static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
                               HInstruction** bounds_check,
@@ -669,7 +672,6 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
   loop_body->AddSuccessor(loop_header);
 
   HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
-  phi->AddInput(constant_initial);
   HInstruction* cmp = nullptr;
   if (cond == kCondGE) {
     cmp = new (allocator) HGreaterThanOrEqual(phi, constant_10);
@@ -681,6 +683,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
   loop_header->AddPhi(phi);
   loop_header->AddInstruction(cmp);
   loop_header->AddInstruction(if_inst);
+  phi->AddInput(constant_initial);
 
   HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0);
   HArrayLength* array_length = new (allocator) HArrayLength(null_check);
@@ -705,39 +708,39 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
-  // int[] array = new array[10];
+  // int[] array = new int[10];
   // for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. }
   HInstruction* bounds_check = nullptr;
   HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
   bounds_check_elimination_after_gvn.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
 
-  // int[] array = new array[10];
+  // int[] array = new int[10];
   // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. }
   graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
   bounds_check_elimination_with_initial_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
 
-  // int[] array = new array[10];
+  // int[] array = new int[10];
   // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. }
   graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
   bounds_check_elimination_with_greater_than.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
 
-  // int[] array = new array[10];
+  // int[] array = new int[10];
   // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. }
   graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_increment_8(graph);
   bounds_check_elimination_increment_8.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -782,7 +785,6 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
   loop_body->AddSuccessor(loop_header);
 
   HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
-  phi->AddInput(constant_initial);
   HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
   HInstruction* array_length = new (allocator) HArrayLength(null_check);
   HInstruction* cmp = nullptr;
@@ -797,6 +799,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
   loop_header->AddInstruction(array_length);
   loop_header->AddInstruction(cmp);
   loop_header->AddInstruction(if_inst);
+  phi->AddInput(constant_initial);
 
   null_check = new (allocator) HNullCheck(parameter, 0);
   array_length = new (allocator) HArrayLength(null_check);
@@ -838,7 +841,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
   // HArrayLength which uses the null check as its input.
   graph = BuildSSAGraph4(&allocator, &bounds_check, 0);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
   bounds_check_elimination_after_gvn.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -846,7 +849,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
   // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. }
   graph = BuildSSAGraph4(&allocator, &bounds_check, 1);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
   bounds_check_elimination_with_initial_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -854,7 +857,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
   // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. }
   graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT);
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
   bounds_check_elimination_with_greater_than.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -901,7 +904,6 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
   HBasicBlock* outer_header = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(outer_header);
   HPhi* phi_i = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
-  phi_i->AddInput(constant_0);
   HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
   HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
   HAdd* add = new (&allocator) HAdd(Primitive::kPrimInt, array_length, constant_minus_1);
@@ -913,11 +915,11 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
   outer_header->AddInstruction(add);
   outer_header->AddInstruction(cmp);
   outer_header->AddInstruction(if_inst);
+  phi_i->AddInput(constant_0);
 
   HBasicBlock* inner_header = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(inner_header);
   HPhi* phi_j = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
-  phi_j->AddInput(constant_0);
   null_check = new (&allocator) HNullCheck(parameter, 0);
   array_length = new (&allocator) HArrayLength(null_check);
   HSub* sub = new (&allocator) HSub(Primitive::kPrimInt, array_length, phi_i);
@@ -931,6 +933,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
   inner_header->AddInstruction(add);
   inner_header->AddInstruction(cmp);
   inner_header->AddInstruction(if_inst);
+  phi_j->AddInput(constant_0);
 
   HBasicBlock* inner_body_compare = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(inner_body_compare);
@@ -1030,7 +1033,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
   outer_body_add->AddSuccessor(outer_header);
 
   graph->BuildDominatorTree();
-  RunGvn(graph);
+  RunSimplifierAndGvn(graph);
   // gvn should remove the same bounds check.
   ASSERT_FALSE(IsRemoved(bounds_check1));
   ASSERT_FALSE(IsRemoved(bounds_check2));
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c5101363ee..3e4a6169d9 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -17,13 +17,13 @@
 #ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_
 #define ART_COMPILER_OPTIMIZING_BUILDER_H_
 
+#include "base/arena_object.h"
 #include "dex_file.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "optimizing_compiler_stats.h"
 #include "primitive.h"
-#include "utils/arena_object.h"
 #include "utils/growable_array.h"
 #include "nodes.h"
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index fd4e391470..2a57fdc929 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,9 +40,17 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) {
   return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue();
 }
 
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
-  DCHECK_EQ(frame_size_, kUninitializedFrameSize);
+static bool IsSingleGoto(HBasicBlock* block) {
+  HLoopInformation* loop_info = block->GetLoopInformation();
+  // TODO: Remove the null check b/19084197.
+  return (block->GetFirstInstruction() != nullptr)
+      && (block->GetFirstInstruction() == block->GetLastInstruction())
+      && block->GetLastInstruction()->IsGoto()
+      // Back edges generate the suspend check.
+      && (loop_info == nullptr || !loop_info->IsBackEdge(block));
+}
 
+void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
   Initialize();
   if (!is_leaf) {
     MarkNotLeaf();
@@ -58,19 +66,43 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
   CompileInternal(allocator, /* is_baseline */ true);
 }
 
+bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
+  DCHECK_EQ(block_order_->Get(current_block_index_), current);
+  return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
+}
+
+HBasicBlock* CodeGenerator::GetNextBlockToEmit() const {
+  for (size_t i = current_block_index_ + 1; i < block_order_->Size(); ++i) {
+    HBasicBlock* block = block_order_->Get(i);
+    if (!IsSingleGoto(block)) {
+      return block;
+    }
+  }
+  return nullptr;
+}
+
+HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
+  while (IsSingleGoto(block)) {
+    block = block->GetSuccessors().Get(0);
+  }
+  return block;
+}
+
 void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
-  HGraphVisitor* location_builder = GetLocationBuilder();
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
   GenerateFrameEntry();
   for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
     HBasicBlock* block = block_order_->Get(current_block_index_);
+    // Don't generate code for an empty block. Its predecessors will branch to its successor
+    // directly. Also, the label of that block will not be emitted, so this helps catch
+    // errors where we reference that label.
+    if (IsSingleGoto(block)) continue;
     Bind(block);
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       if (is_baseline) {
-        current->Accept(location_builder);
-        InitLocations(current);
+        InitLocationsBaseline(current);
       }
       current->Accept(instruction_visitor);
     }
@@ -88,7 +120,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
 void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
   // The register allocator already called `InitializeCodeGeneration`,
   // where the frame size has been computed.
-  DCHECK_NE(frame_size_, kUninitializedFrameSize);
   DCHECK(block_order_ != nullptr);
   Initialize();
   CompileInternal(allocator, /* is_baseline */ false);
@@ -138,13 +169,22 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
   ComputeSpillMask();
   first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
 
-  SetFrameSize(RoundUp(
-      number_of_spill_slots * kVRegSize
-      + number_of_out_slots * kVRegSize
-      + maximum_number_of_live_core_registers * GetWordSize()
-      + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
-      + FrameEntrySpillSize(),
-      kStackAlignment));
+  if (number_of_spill_slots == 0
+      && !HasAllocatedCalleeSaveRegisters()
+      && IsLeafMethod()
+      && !RequiresCurrentMethod()) {
+    DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
+    DCHECK_EQ(maximum_number_of_live_fp_registers, 0u);
+    SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
+  } else {
+    SetFrameSize(RoundUp(
+        number_of_spill_slots * kVRegSize
+        + number_of_out_slots * kVRegSize
+        + maximum_number_of_live_core_registers * GetWordSize()
+        + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+        + FrameEntrySpillSize(),
+        kStackAlignment));
+  }
 }
 
 Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const {
@@ -294,7 +334,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
   }
 }
 
-void CodeGenerator::InitLocations(HInstruction* instruction) {
+void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
+  AllocateLocations(instruction);
   if (instruction->GetLocations() == nullptr) {
     if (instruction->IsTemporary()) {
       HInstruction* previous = instruction->GetPrevious();
@@ -320,10 +361,17 @@ void CodeGenerator::InitLocations(HInstruction* instruction) {
   }
 }
 
-bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
-  DCHECK_EQ(block_order_->Get(current_block_index_), current);
-  return (current_block_index_ < block_order_->Size() - 1)
-      && (block_order_->Get(current_block_index_ + 1) == next);
+void CodeGenerator::AllocateLocations(HInstruction* instruction) {
+  instruction->Accept(GetLocationBuilder());
+  LocationSummary* locations = instruction->GetLocations();
+  if (!instruction->IsSuspendCheckEntry()) {
+    if (locations != nullptr && locations->CanCall()) {
+      MarkNotLeaf();
+    }
+    if (instruction->NeedsCurrentMethod()) {
+      SetRequiresCurrentMethod();
+    }
+  }
 }
 
 CodeGenerator* CodeGenerator::Create(HGraph* graph,
@@ -572,7 +620,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
     Location location = locations->GetEnvironmentAt(i);
     switch (location.GetKind()) {
       case Location::kConstant: {
-        DCHECK(current == location.GetConstant());
+        DCHECK_EQ(current, location.GetConstant());
         if (current->IsLongConstant()) {
           int64_t value = current->AsLongConstant()->GetValue();
           stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value));
@@ -588,6 +636,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
         } else if (current->IsIntConstant()) {
           int32_t value = current->AsIntConstant()->GetValue();
           stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value);
+        } else if (current->IsNullConstant()) {
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0);
         } else {
           DCHECK(current->IsFloatConstant());
           int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ab63b911b2..f46a36d02f 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -30,7 +30,6 @@
 namespace art {
 
 static size_t constexpr kVRegSize = 4;
-static size_t constexpr kUninitializedFrameSize = 0;
 
 // Binary encoding of 2^32 for type double.
 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
@@ -92,6 +91,8 @@ class CodeGenerator {
 
   HGraph* GetGraph() const { return graph_; }
 
+  HBasicBlock* GetNextBlockToEmit() const;
+  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
 
   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
@@ -107,8 +108,6 @@ class CodeGenerator {
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(HBasicBlock* block) = 0;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
-  virtual HGraphVisitor* GetLocationBuilder() = 0;
-  virtual HGraphVisitor* GetInstructionVisitor() = 0;
   virtual Assembler* GetAssembler() = 0;
   virtual size_t GetWordSize() const = 0;
   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
@@ -196,6 +195,15 @@ class CodeGenerator {
 
   void MarkNotLeaf() {
     is_leaf_ = false;
+    requires_current_method_ = true;
+  }
+
+  void SetRequiresCurrentMethod() {
+    requires_current_method_ = true;
+  }
+
+  bool RequiresCurrentMethod() const {
+    return requires_current_method_;
   }
 
   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
@@ -228,6 +236,41 @@ class CodeGenerator {
     allocated_registers_.Add(location);
   }
 
+  void AllocateLocations(HInstruction* instruction);
+
+  // Tells whether the stack frame of the compiled method is
+  // considered "empty", that is either actually having a size of zero,
+  // or just containing the saved return address register.
+  bool HasEmptyFrame() const {
+    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
+  }
+
+  static int32_t GetInt32ValueOf(HConstant* constant) {
+    if (constant->IsIntConstant()) {
+      return constant->AsIntConstant()->GetValue();
+    } else if (constant->IsNullConstant()) {
+      return 0;
+    } else {
+      DCHECK(constant->IsFloatConstant());
+      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+    }
+  }
+
+  static int64_t GetInt64ValueOf(HConstant* constant) {
+    if (constant->IsIntConstant()) {
+      return constant->AsIntConstant()->GetValue();
+    } else if (constant->IsNullConstant()) {
+      return 0;
+    } else if (constant->IsFloatConstant()) {
+      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+    } else if (constant->IsLongConstant()) {
+      return constant->AsLongConstant()->GetValue();
+    } else {
+      DCHECK(constant->IsDoubleConstant());
+      return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+    }
+  }
+
  protected:
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
@@ -236,7 +279,7 @@ class CodeGenerator {
                 uint32_t core_callee_save_mask,
                 uint32_t fpu_callee_save_mask,
                 const CompilerOptions& compiler_options)
-      : frame_size_(kUninitializedFrameSize),
+      : frame_size_(0),
         core_spill_mask_(0),
         fpu_spill_mask_(0),
         first_register_slot_in_slow_path_(0),
@@ -255,6 +298,7 @@ class CodeGenerator {
         block_order_(nullptr),
         current_block_index_(0),
         is_leaf_(true),
+        requires_current_method_(false),
         stack_map_stream_(graph->GetArena()) {}
 
   // Register allocation logic.
@@ -269,11 +313,12 @@ class CodeGenerator {
   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
 
   virtual ParallelMoveResolver* GetMoveResolver() = 0;
+  virtual HGraphVisitor* GetLocationBuilder() = 0;
+  virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
   // Returns the location of the first spilled entry for floating point registers,
   // relative to the stack pointer.
   uint32_t GetFpuSpillStart() const {
-    DCHECK_NE(frame_size_, kUninitializedFrameSize);
     return GetFrameSize() - FrameEntrySpillSize();
   }
 
@@ -289,6 +334,25 @@ class CodeGenerator {
     return GetFpuSpillSize() + GetCoreSpillSize();
   }
 
+  bool HasAllocatedCalleeSaveRegisters() const {
+    // We check the core registers against 1 because it always comprises the return PC.
+    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
+      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
+  }
+
+  bool CallPushesPC() const {
+    InstructionSet instruction_set = GetInstructionSet();
+    return instruction_set == kX86 || instruction_set == kX86_64;
+  }
+
+  // Arm64 has its own type for a label, so we need to templatize this method
+  // to share the logic.
+  template <typename T>
+  T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
+    block = FirstNonEmptyBlock(block);
+    return raw_pointer_to_labels_array + block->GetBlockId();
+  }
+
   // Frame size required for this method.
   uint32_t frame_size_;
   uint32_t core_spill_mask_;
@@ -311,7 +375,7 @@ class CodeGenerator {
   const uint32_t fpu_callee_save_mask_;
 
  private:
-  void InitLocations(HInstruction* instruction);
+  void InitLocationsBaseline(HInstruction* instruction);
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void CompileInternal(CodeAllocator* allocator, bool is_baseline);
 
@@ -328,8 +392,12 @@ class CodeGenerator {
   // we are generating code for.
   size_t current_block_index_;
 
+  // Whether the method is a leaf method.
   bool is_leaf_;
 
+  // Whether an instruction in the graph accesses the current method.
+  bool requires_current_method_;
+
   StackMapStream stack_map_stream_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 78fd181dcf..e864ae1cec 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -19,6 +19,8 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_arm.h"
 #include "mirror/array-inl.h"
 #include "mirror/art_method.h"
 #include "mirror/class.h"
@@ -32,11 +34,6 @@ namespace art {
 
 namespace arm {
 
-static DRegister FromLowSToD(SRegister reg) {
-  DCHECK_EQ(reg % 2, 0);
-  return static_cast<DRegister>(reg / 2);
-}
-
 static bool ExpectedPairLayout(Location location) {
   // We expected this for both core and fpu register pairs.
   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
@@ -58,6 +55,10 @@ static constexpr Register kCoreCalleeSaves[] =
 static constexpr SRegister kFpuCalleeSaves[] =
     { S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 };
 
+// D31 cannot be split into two S registers, and the register allocator only works on
+// S registers. Therefore there is no need to block it.
+static constexpr DRegister DTMP = D31;
+
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
  public:
   InvokeRuntimeCallingConvention()
@@ -73,20 +74,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis
 #define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
 
-class SlowPathCodeARM : public SlowPathCode {
- public:
-  SlowPathCodeARM() : entry_label_(), exit_label_() {}
-
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
-
- private:
-  Label entry_label_;
-  Label exit_label_;
-
-  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM);
-};
-
 class NullCheckSlowPathARM : public SlowPathCodeARM {
  public:
   explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {}
@@ -396,10 +383,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
       move_resolver_(graph->GetArena(), this),
       assembler_(true),
       isa_features_(isa_features) {
-  // Save one extra register for baseline. Note that on thumb2, there is no easy
-  // instruction to restore just the PC, so this actually helps both baseline
-  // and non-baseline to save and restore at least two registers at entry and exit.
-  AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline));
   // Save the PC register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(PC));
 }
@@ -508,6 +491,10 @@ static uint32_t LeastSignificantBit(uint32_t mask) {
 
 void CodeGeneratorARM::ComputeSpillMask() {
   core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+  // Save one extra register for baseline. Note that on thumb2, there is no easy
+  // instruction to restore just the PC, so this actually helps both baseline
+  // and non-baseline to save and restore at least two registers at entry and exit.
+  core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
   DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
   fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
   // We use vpush and vpop for saving and restoring floating point registers, which take
@@ -529,6 +516,10 @@ void CodeGeneratorARM::GenerateFrameEntry() {
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
   __ Bind(&frame_entry_label_);
 
+  if (HasEmptyFrame()) {
+    return;
+  }
+
   if (!skip_overflow_check) {
     __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
     __ LoadFromOffset(kLoadWord, IP, IP, 0);
@@ -547,6 +538,10 @@ void CodeGeneratorARM::GenerateFrameEntry() {
 }
 
 void CodeGeneratorARM::GenerateFrameExit() {
+  if (HasEmptyFrame()) {
+    __ bx(LR);
+    return;
+  }
   __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
@@ -784,8 +779,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr
 
   if (locations != nullptr && locations->Out().IsConstant()) {
     HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant()) {
-      int32_t value = const_to_move->AsIntConstant()->GetValue();
+    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+      int32_t value = GetInt32ValueOf(const_to_move);
       if (location.IsRegister()) {
         __ LoadImmediate(location.AsRegister<Register>(), value);
       } else {
@@ -952,8 +947,8 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
         __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>()));
       } else {
         DCHECK(locations->InAt(1).IsConstant());
-        int32_t value =
-            locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+        HConstant* constant = locations->InAt(1).GetConstant();
+        int32_t value = CodeGenerator::GetInt32ValueOf(constant);
         ShifterOperand operand;
         if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) {
           __ cmp(left, operand);
@@ -1114,6 +1109,17 @@ void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
   UNUSED(constant);
 }
 
+void LocationsBuilderARM::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM::VisitNullConstant(HNullConstant* constant) {
+  // Will be generated at use site.
+  UNUSED(constant);
+}
+
 void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1168,44 +1174,37 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+                                         codegen_->GetInstructionSetFeatures());
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
 void CodeGeneratorARM::LoadCurrentMethod(Register reg) {
+  DCHECK(RequiresCurrentMethod());
   __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
 }
 
-void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    IntrinsicCodeGeneratorARM intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
+    return true;
+  }
+  return false;
+}
 
-  // temp = method;
-  codegen_->LoadCurrentMethod(temp);
-  if (!invoke->IsRecursive()) {
-    // temp = temp->dex_cache_resolved_methods_;
-    __ LoadFromOffset(
-        kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
-    // temp = temp[index_in_cache]
-    __ LoadFromOffset(
-        kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
-    // LR = temp[offset_of_quick_compiled_code]
-    __ LoadFromOffset(kLoadWord, LR, temp,
-                      mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                             kArmWordSize).Int32Value());
-    // LR()
-    __ blx(LR);
-  } else {
-    __ bl(codegen_->GetFrameEntryLabel());
+void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
   }
 
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
-  DCHECK(!codegen_->IsLeafMethod());
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+
+  codegen_->GenerateStaticOrDirectCall(invoke, temp);
 }
 
 void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
@@ -1223,10 +1222,20 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
 }
 
 void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+                                         codegen_->GetInstructionSetFeatures());
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
   Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
           invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
@@ -3366,16 +3375,44 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
       __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex());
     }
   } else if (source.IsDoubleStackSlot()) {
-    DCHECK(destination.IsDoubleStackSlot()) << destination;
-    __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
-    __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
-    __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
-    __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+    if (destination.IsDoubleStackSlot()) {
+      __ LoadDFromOffset(DTMP, SP, source.GetStackIndex());
+      __ StoreDToOffset(DTMP, SP, destination.GetStackIndex());
+    } else if (destination.IsRegisterPair()) {
+      DCHECK(ExpectedPairLayout(destination));
+      __ LoadFromOffset(
+          kLoadWordPair, destination.AsRegisterPairLow<Register>(), SP, source.GetStackIndex());
+    } else {
+      DCHECK(destination.IsFpuRegisterPair()) << destination;
+      __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+                         SP,
+                         source.GetStackIndex());
+    }
+  } else if (source.IsRegisterPair()) {
+    if (destination.IsRegisterPair()) {
+      __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
+      __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+    } else {
+      DCHECK(destination.IsDoubleStackSlot()) << destination;
+      DCHECK(ExpectedPairLayout(source));
+      __ StoreToOffset(
+          kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex());
+    }
+  } else if (source.IsFpuRegisterPair()) {
+    if (destination.IsFpuRegisterPair()) {
+      __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+               FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
+    } else {
+      DCHECK(destination.IsDoubleStackSlot()) << destination;
+      __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
+                        SP,
+                        destination.GetStackIndex());
+    }
   } else {
     DCHECK(source.IsConstant()) << source;
-    HInstruction* constant = source.GetConstant();
-    if (constant->IsIntConstant()) {
-      int32_t value = constant->AsIntConstant()->GetValue();
+    HConstant* constant = source.GetConstant();
+    if (constant->IsIntConstant() || constant->IsNullConstant()) {
+      int32_t value = CodeGenerator::GetInt32ValueOf(constant);
       if (destination.IsRegister()) {
         __ LoadImmediate(destination.AsRegister<Register>(), value);
       } else {
@@ -3385,17 +3422,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
       }
     } else if (constant->IsLongConstant()) {
       int64_t value = constant->AsLongConstant()->GetValue();
-      if (destination.IsRegister()) {
-        // In the presence of long or double constants, the parallel move resolver will
-        // split the move into two, but keeps the same constant for both moves. Here,
-        // we use the low or high part depending on which register this move goes to.
-        if (destination.reg() % 2 == 0) {
-          __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value));
-        } else {
-          __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value));
-        }
+      if (destination.IsRegisterPair()) {
+        __ LoadImmediate(destination.AsRegisterPairLow<Register>(), Low32Bits(value));
+        __ LoadImmediate(destination.AsRegisterPairHigh<Register>(), High32Bits(value));
       } else {
-        DCHECK(destination.IsDoubleStackSlot());
+        DCHECK(destination.IsDoubleStackSlot()) << destination;
         __ LoadImmediate(IP, Low32Bits(value));
         __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
         __ LoadImmediate(IP, High32Bits(value));
@@ -3403,20 +3434,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
       }
     } else if (constant->IsDoubleConstant()) {
       double value = constant->AsDoubleConstant()->GetValue();
-      uint64_t int_value = bit_cast<uint64_t, double>(value);
-      if (destination.IsFpuRegister()) {
-        // In the presence of long or double constants, the parallel move resolver will
-        // split the move into two, but keeps the same constant for both moves. Here,
-        // we use the low or high part depending on which register this move goes to.
-        if (destination.reg() % 2 == 0) {
-          __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
-                            bit_cast<float, uint32_t>(Low32Bits(int_value)));
-        } else {
-          __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
-                            bit_cast<float, uint32_t>(High32Bits(int_value)));
-        }
+      if (destination.IsFpuRegisterPair()) {
+        __ LoadDImmediate(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), value);
       } else {
-        DCHECK(destination.IsDoubleStackSlot());
+        DCHECK(destination.IsDoubleStackSlot()) << destination;
+        uint64_t int_value = bit_cast<uint64_t, double>(value);
         __ LoadImmediate(IP, Low32Bits(int_value));
         __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
         __ LoadImmediate(IP, High32Bits(int_value));
@@ -3474,6 +3496,40 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) {
     __ vmovrs(IP, source.AsFpuRegister<SRegister>());
     __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>());
     __ vmovsr(destination.AsFpuRegister<SRegister>(), IP);
+  } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
+    __ vmovdrr(DTMP, source.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
+    __ Mov(source.AsRegisterPairLow<Register>(), destination.AsRegisterPairLow<Register>());
+    __ Mov(source.AsRegisterPairHigh<Register>(), destination.AsRegisterPairHigh<Register>());
+    __ vmovrrd(destination.AsRegisterPairLow<Register>(),
+               destination.AsRegisterPairHigh<Register>(),
+               DTMP);
+  } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
+    Register low_reg = source.IsRegisterPair()
+        ? source.AsRegisterPairLow<Register>()
+        : destination.AsRegisterPairLow<Register>();
+    int mem = source.IsRegisterPair()
+        ? destination.GetStackIndex()
+        : source.GetStackIndex();
+    DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
+    __ vmovdrr(DTMP, low_reg, static_cast<Register>(low_reg + 1));
+    __ LoadFromOffset(kLoadWordPair, low_reg, SP, mem);
+    __ StoreDToOffset(DTMP, SP, mem);
+  } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
+    DRegister first = FromLowSToD(source.AsFpuRegisterPairLow<SRegister>());
+    DRegister second = FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>());
+    __ vmovd(DTMP, first);
+    __ vmovd(first, second);
+    __ vmovd(second, DTMP);
+  } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
+    DRegister reg = source.IsFpuRegisterPair()
+        ? FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())
+        : FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>());
+    int mem = source.IsFpuRegisterPair()
+        ? destination.GetStackIndex()
+        : source.GetStackIndex();
+    __ vmovd(DTMP, reg);
+    __ LoadDFromOffset(reg, SP, mem);
+    __ StoreDToOffset(DTMP, SP, mem);
   } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
     SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>()
                                            : destination.AsFpuRegister<SRegister>();
@@ -3482,7 +3538,7 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) {
         : source.GetStackIndex();
 
     __ vmovrs(IP, reg);
-    __ LoadFromOffset(kLoadWord, IP, SP, mem);
+    __ LoadSFromOffset(reg, SP, mem);
     __ StoreToOffset(kStoreWord, IP, SP, mem);
   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
     Exchange(source.GetStackIndex(), destination.GetStackIndex());
@@ -3776,5 +3832,50 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr
   }
 }
 
+void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) {
+  DCHECK_EQ(temp, kArtMethodRegister);
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  if (!invoke->IsRecursive()) {
+    // temp = temp->dex_cache_resolved_methods_;
+    __ LoadFromOffset(
+        kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+    // temp = temp[index_in_cache]
+    __ LoadFromOffset(
+        kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+    // LR = temp[offset_of_quick_compiled_code]
+    __ LoadFromOffset(kLoadWord, LR, temp,
+                      mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                          kArmWordSize).Int32Value());
+    // LR()
+    __ blx(LR);
+  } else {
+    __ bl(GetFrameEntryLabel());
+  }
+
+  RecordPcInfo(invoke, invoke->GetDexPc());
+  DCHECK(!IsLeafMethod());
+}
+
+void LocationsBuilderARM::VisitBoundType(HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  UNUSED(instruction);
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARM::VisitBoundType(HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  UNUSED(instruction);
+  LOG(FATAL) << "Unreachable";
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 4b03dffd38..f1a3729c13 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -39,6 +39,14 @@ static constexpr SRegister kParameterFpuRegisters[] =
     { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 };
 static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
 
+static constexpr Register kArtMethodRegister = R0;
+
+static constexpr DRegister FromLowSToD(SRegister reg) {
+  return DCHECK_CONSTEXPR(reg % 2 == 0, , D0)
+      static_cast<DRegister>(reg / 2);
+}
+
+
 class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> {
  public:
   InvokeDexCallingConvention()
@@ -90,6 +98,20 @@ class ParallelMoveResolverARM : public ParallelMoveResolver {
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM);
 };
 
+class SlowPathCodeARM : public SlowPathCode {
+ public:
+  SlowPathCodeARM() : entry_label_(), exit_label_() {}
+
+  Label* GetEntryLabel() { return &entry_label_; }
+  Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+  Label entry_label_;
+  Label exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM);
+};
+
 class LocationsBuilderARM : public HGraphVisitor {
  public:
   LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -230,7 +252,7 @@ class CodeGeneratorARM : public CodeGenerator {
   void MarkGCCard(Register temp, Register card, Register object, Register value);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return block_labels_.GetRawStorage() + block->GetBlockId();
+    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
   }
 
   void Initialize() OVERRIDE {
@@ -249,6 +271,8 @@ class CodeGeneratorARM : public CodeGenerator {
 
   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
 
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp);
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3bc23fe4f3..0d7864fa35 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -402,15 +402,15 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com
                     kNumberOfAllocatableRegisters,
                     kNumberOfAllocatableFPRegisters,
                     kNumberOfAllocatableRegisterPairs,
-                    (1 << LR),
-                    0,
+                    callee_saved_core_registers.list(),
+                    callee_saved_fp_registers.list(),
                     compiler_options),
       block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this) {
   // Save the link register (containing the return address) to mimic Quick.
-  AddAllocatedRegister(Location::RegisterLocation(LR));
+  AddAllocatedRegister(LocationFrom(lr));
 }
 
 #undef __
@@ -448,27 +448,32 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
     UseScratchRegisterScope temps(GetVIXLAssembler());
     Register temp = temps.AcquireX();
     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
-    __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+    __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
     __ Ldr(wzr, MemOperand(temp, 0));
     RecordPcInfo(nullptr, 0);
   }
 
-  int frame_size = GetFrameSize();
-  __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
-  __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
-
-  // Stack layout:
-  // sp[frame_size - 8]        : lr.
-  // ...                       : other preserved registers.
-  // sp[frame_size - regs_size]: first preserved register.
-  // ...                       : reserved frame space.
-  // sp[0]                     : current method.
+  if (!HasEmptyFrame()) {
+    int frame_size = GetFrameSize();
+    // Stack layout:
+    //      sp[frame_size - 8]        : lr.
+    //      ...                       : other preserved core registers.
+    //      ...                       : other preserved fp registers.
+    //      ...                       : reserved frame space.
+    //      sp[0]                     : current method.
+    __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
+    __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+    __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+  }
 }
 
 void CodeGeneratorARM64::GenerateFrameExit() {
-  int frame_size = GetFrameSize();
-  __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
-  __ Drop(frame_size);
+  if (!HasEmptyFrame()) {
+    int frame_size = GetFrameSize();
+    __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+    __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+    __ Drop(frame_size);
+  }
 }
 
 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -486,18 +491,21 @@ void CodeGeneratorARM64::Move(HInstruction* instruction,
   Primitive::Type type = instruction->GetType();
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  if (instruction->IsIntConstant() || instruction->IsLongConstant()) {
-    int64_t value = instruction->IsIntConstant() ? instruction->AsIntConstant()->GetValue()
-                                                 : instruction->AsLongConstant()->GetValue();
+  if (instruction->IsIntConstant()
+      || instruction->IsLongConstant()
+      || instruction->IsNullConstant()) {
+    int64_t value = GetInt64ValueOf(instruction->AsConstant());
     if (location.IsRegister()) {
       Register dst = RegisterFrom(location, type);
-      DCHECK((instruction->IsIntConstant() && dst.Is32Bits()) ||
+      DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) ||
              (instruction->IsLongConstant() && dst.Is64Bits()));
       __ Mov(dst, value);
     } else {
       DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
       UseScratchRegisterScope temps(GetVIXLAssembler());
-      Register temp = instruction->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
+      Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant())
+          ? temps.AcquireW()
+          : temps.AcquireX();
       __ Mov(temp, value);
       __ Str(temp, StackOperandFrom(location));
     }
@@ -555,26 +563,38 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value) {
   __ Bind(&done);
 }
 
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
-  // Block reserved registers:
-  //   ip0 (VIXL temporary)
-  //   ip1 (VIXL temporary)
-  //   tr
-  //   lr
-  // sp is not part of the allocatable registers, so we don't need to block it.
-  // TODO: Avoid blocking callee-saved registers, and instead preserve them
-  // where necessary.
+void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+  // Blocked core registers:
+  //      lr        : Runtime reserved.
+  //      tr        : Runtime reserved.
+  //      xSuspend  : Runtime reserved. TODO: Unblock this when the runtime stops using it.
+  //      ip1       : VIXL core temp.
+  //      ip0       : VIXL core temp.
+  //
+  // Blocked fp registers:
+  //      d31       : VIXL fp temp.
   CPURegList reserved_core_registers = vixl_reserved_core_registers;
   reserved_core_registers.Combine(runtime_reserved_core_registers);
-  reserved_core_registers.Combine(quick_callee_saved_registers);
   while (!reserved_core_registers.IsEmpty()) {
     blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
   }
+
   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
-  reserved_fp_registers.Combine(CPURegList::GetCalleeSavedFP());
-  while (!reserved_core_registers.IsEmpty()) {
+  while (!reserved_fp_registers.IsEmpty()) {
     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
   }
+
+  if (is_baseline) {
+    CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
+    while (!reserved_core_baseline_registers.IsEmpty()) {
+      blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
+    }
+
+    CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
+    while (!reserved_fp_baseline_registers.IsEmpty()) {
+      blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+    }
+  }
 }
 
 Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
@@ -626,10 +646,12 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg
 }
 
 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
-  if (constant->IsIntConstant() || constant->IsLongConstant()) {
-    __ Mov(Register(destination),
-           constant->IsIntConstant() ? constant->AsIntConstant()->GetValue()
-                                     : constant->AsLongConstant()->GetValue());
+  if (constant->IsIntConstant()) {
+    __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
+  } else if (constant->IsLongConstant()) {
+    __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
+  } else if (constant->IsNullConstant()) {
+    __ Mov(Register(destination), 0);
   } else if (constant->IsFloatConstant()) {
     __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
   } else {
@@ -643,6 +665,8 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
   DCHECK(constant.IsConstant());
   HConstant* cst = constant.GetConstant();
   return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
+         // Null is mapped to a core W register, which we associate with kPrimInt.
+         (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
          (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
          (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
          (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
@@ -663,7 +687,9 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
     if (unspecified_type) {
       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
       if (source.IsStackSlot() ||
-          (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) {
+          (src_cst != nullptr && (src_cst->IsIntConstant()
+                                  || src_cst->IsFloatConstant()
+                                  || src_cst->IsNullConstant()))) {
         // For stack slots and 32bit constants, a 64bit type is appropriate.
         type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
       } else {
@@ -709,7 +735,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
       UseScratchRegisterScope temps(GetVIXLAssembler());
       HConstant* src_cst = source.GetConstant();
       CPURegister temp;
-      if (src_cst->IsIntConstant()) {
+      if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
         temp = temps.AcquireW();
       } else if (src_cst->IsLongConstant()) {
         temp = temps.AcquireX();
@@ -947,6 +973,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
 }
 
 void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
+  DCHECK(RequiresCurrentMethod());
   DCHECK(current_method.IsW());
   __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
 }
@@ -1370,7 +1397,13 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      HInstruction* right = compare->InputAt(1);
+      if ((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) ||
+          (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))) {
+        locations->SetInAt(1, Location::ConstantLocation(right->AsConstant()));
+      } else {
+        locations->SetInAt(1, Location::RequiresFpuRegister());
+      }
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -1400,9 +1433,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
     case Primitive::kPrimDouble: {
       Register result = OutputRegister(compare);
       FPRegister left = InputFPRegisterAt(compare, 0);
-      FPRegister right = InputFPRegisterAt(compare, 1);
-
-      __ Fcmp(left, right);
+      if (compare->GetLocations()->InAt(1).IsConstant()) {
+        if (kIsDebugBuild) {
+          HInstruction* right = compare->GetLocations()->InAt(1).GetConstant();
+          DCHECK((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) ||
+                  (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0)));
+        }
+        // 0.0 is the only immediate that can be encoded directly in a FCMP instruction.
+        __ Fcmp(left, 0.0);
+      } else {
+        __ Fcmp(left, InputFPRegisterAt(compare, 1));
+      }
       if (compare->IsGtBias()) {
         __ Cset(result, ne);
       } else {
@@ -1752,6 +1793,16 @@ void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant) {
   UNUSED(constant);
 }
 
+void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant) {
+  // Will be generated at use site.
+  UNUSED(constant);
+}
+
 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
@@ -2545,6 +2596,18 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
   HandleBinaryOp(instruction);
 }
 
+void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  UNUSED(instruction);
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  UNUSED(instruction);
+  LOG(FATAL) << "Unreachable";
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 9a99dcccea..afb7fc3718 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -50,14 +50,24 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi
 
 const vixl::Register tr = vixl::x18;                        // Thread Register
 static const vixl::Register kArtMethodRegister = vixl::w0;  // Method register on invoke.
+const vixl::Register kQuickSuspendRegister = vixl::x19;
 
 const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
 const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
-const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister,
-                                                    vixl::kXRegSize,
-                                                    kArm64CalleeSaveRefSpills);
 
+// TODO: When the runtime does not use kQuickSuspendRegister as a suspend
+// counter remove it from the reserved registers list.
+const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr);
+
+// Callee-saved registers defined by AAPCS64.
+const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
+                                                   vixl::kXRegSize,
+                                                   vixl::x19.code(),
+                                                   vixl::x30.code());
+const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
+                                                 vixl::kDRegSize,
+                                                 vixl::d8.code(),
+                                                 vixl::d15.code());
 Location ARM64ReturnLocation(Primitive::Type return_type);
 
 class SlowPathCodeARM64 : public SlowPathCode {
@@ -191,16 +201,20 @@ class CodeGeneratorARM64 : public CodeGenerator {
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
 
-  static const vixl::CPURegList& GetFramePreservedRegisters() {
-    static const vixl::CPURegList frame_preserved_regs =
-        vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit());
-    return frame_preserved_regs;
+  vixl::CPURegList GetFramePreservedCoreRegisters() const {
+    return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
+                            core_spill_mask_);
+  }
+
+  vixl::CPURegList GetFramePreservedFPRegisters() const {
+    return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
+                            fpu_spill_mask_);
   }
 
   void Bind(HBasicBlock* block) OVERRIDE;
 
   vixl::Label* GetLabelOf(HBasicBlock* block) const {
-    return block_labels_ + block->GetBlockId();
+    return CommonGetLabelOf<vixl::Label>(block_labels_, block);
   }
 
   void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 98f93a418a..1101569174 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -470,12 +470,16 @@ void CodeGeneratorX86::GenerateFrameEntry() {
     RecordPcInfo(nullptr, 0);
   }
 
-  __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
-  __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
+  if (!HasEmptyFrame()) {
+    __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+    __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
+  }
 }
 
 void CodeGeneratorX86::GenerateFrameExit() {
-  __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+  if (!HasEmptyFrame()) {
+    __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+  }
 }
 
 void CodeGeneratorX86::Bind(HBasicBlock* block) {
@@ -483,6 +487,7 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) {
 }
 
 void CodeGeneratorX86::LoadCurrentMethod(Register reg) {
+  DCHECK(RequiresCurrentMethod());
   __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
 }
 
@@ -597,13 +602,7 @@ void CodeGeneratorX86::Move32(Location destination, Location source) {
       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
     } else if (source.IsConstant()) {
       HConstant* constant = source.GetConstant();
-      int32_t value;
-      if (constant->IsIntConstant()) {
-        value = constant->AsIntConstant()->GetValue();
-      } else {
-        DCHECK(constant->IsFloatConstant());
-        value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
-      }
+      int32_t value = GetInt32ValueOf(constant);
       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
     } else {
       DCHECK(source.IsStackSlot());
@@ -669,8 +668,8 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr
 
   if (locations != nullptr && locations->Out().IsConstant()) {
     HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant()) {
-      Immediate imm(const_to_move->AsIntConstant()->GetValue());
+    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+      Immediate imm(GetInt32ValueOf(const_to_move));
       if (location.IsRegister()) {
         __ movl(location.AsRegister<Register>(), imm);
       } else if (location.IsStackSlot()) {
@@ -920,7 +919,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) {
               locations->InAt(1).AsRegister<Register>());
     } else if (locations->InAt(1).IsConstant()) {
       HConstant* instruction = locations->InAt(1).GetConstant();
-      Immediate imm(instruction->AsIntConstant()->GetValue());
+      Immediate imm(CodeGenerator::GetInt32ValueOf(instruction));
       __ cmpl(locations->InAt(0).AsRegister<Register>(), imm);
     } else {
       __ cmpl(locations->InAt(0).AsRegister<Register>(),
@@ -989,6 +988,17 @@ void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) {
   UNUSED(constant);
 }
 
+void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant) {
+  // Will be generated at use site.
+  UNUSED(constant);
+}
+
 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1799,7 +1809,7 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) {
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::Any());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -1843,8 +1853,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
     case Primitive::kPrimFloat: {
       if (second.IsFpuRegister()) {
         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
-      } else {
-        __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
@@ -1852,8 +1860,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
     case Primitive::kPrimDouble: {
       if (second.IsFpuRegister()) {
         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
-      } else {
-        __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
@@ -3495,8 +3501,8 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
     }
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
-    if (constant->IsIntConstant()) {
-      Immediate imm(constant->AsIntConstant()->GetValue());
+    if (constant->IsIntConstant() || constant->IsNullConstant()) {
+      Immediate imm(CodeGenerator::GetInt32ValueOf(constant));
       if (destination.IsRegister()) {
         __ movl(destination.AsRegister<Register>(), imm);
       } else {
@@ -3904,5 +3910,17 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr
   }
 }
 
+void LocationsBuilderX86::VisitBoundType(HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  UNUSED(instruction);
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  UNUSED(instruction);
+  LOG(FATAL) << "Unreachable";
+}
+
 }  // namespace x86
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 107ddafea4..f5a9b7d1f7 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -234,7 +234,7 @@ class CodeGeneratorX86 : public CodeGenerator {
   void LoadCurrentMethod(Register reg);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return block_labels_.GetRawStorage() + block->GetBlockId();
+    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
   }
 
   void Initialize() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2ff53a0603..41a19e11f0 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -487,6 +487,10 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
     RecordPcInfo(nullptr, 0);
   }
 
+  if (HasEmptyFrame()) {
+    return;
+  }
+
   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
@@ -509,6 +513,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
 }
 
 void CodeGeneratorX86_64::GenerateFrameExit() {
+  if (HasEmptyFrame()) {
+    return;
+  }
   uint32_t xmm_spill_location = GetFpuSpillStart();
   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
@@ -533,6 +540,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
 }
 
 void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) {
+  DCHECK(RequiresCurrentMethod());
   __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
 }
 
@@ -599,13 +607,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
                source.AsFpuRegister<XmmRegister>());
     } else if (source.IsConstant()) {
       HConstant* constant = source.GetConstant();
-      int32_t value;
-      if (constant->IsFloatConstant()) {
-        value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
-      } else {
-        DCHECK(constant->IsIntConstant());
-        value = constant->AsIntConstant()->GetValue();
-      }
+      int32_t value = GetInt32ValueOf(constant);
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
     } else {
       DCHECK(source.IsStackSlot()) << source;
@@ -649,8 +651,8 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction,
 
   if (locations != nullptr && locations->Out().IsConstant()) {
     HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant()) {
-      Immediate imm(const_to_move->AsIntConstant()->GetValue());
+    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+      Immediate imm(GetInt32ValueOf(const_to_move));
       if (location.IsRegister()) {
         __ movl(location.AsRegister<CpuRegister>(), imm);
       } else if (location.IsStackSlot()) {
@@ -790,7 +792,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
         // Materialized condition, compare against 0.
         Location lhs = if_instr->GetLocations()->InAt(0);
         if (lhs.IsRegister()) {
-          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(0));
+          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
         } else {
           __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
                   Immediate(0));
@@ -806,8 +808,12 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
       if (rhs.IsRegister()) {
         __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
-        __ cmpl(lhs.AsRegister<CpuRegister>(),
-                Immediate(rhs.GetConstant()->AsIntConstant()->GetValue()));
+        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+        if (constant == 0) {
+          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+        } else {
+          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+        }
       } else {
         __ cmpl(lhs.AsRegister<CpuRegister>(),
                 Address(CpuRegister(RSP), rhs.GetStackIndex()));
@@ -883,15 +889,19 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
     CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
     // Clear register: setcc only sets the low byte.
     __ xorq(reg, reg);
-    if (locations->InAt(1).IsRegister()) {
-      __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
-              locations->InAt(1).AsRegister<CpuRegister>());
-    } else if (locations->InAt(1).IsConstant()) {
-      __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
-              Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue()));
+    Location lhs = locations->InAt(0);
+    Location rhs = locations->InAt(1);
+    if (rhs.IsRegister()) {
+      __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+    } else if (rhs.IsConstant()) {
+      int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue();
+      if (constant == 0) {
+        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+      } else {
+        __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+      }
     } else {
-      __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
-              Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+      __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
     }
     __ setcc(X86_64Condition(comp->GetCondition()), reg);
   }
@@ -1018,6 +1028,17 @@ void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
   UNUSED(constant);
 }
 
+void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant) {
+  // Will be generated at use site.
+  UNUSED(constant);
+}
+
 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1840,8 +1861,8 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::Any());
-      locations->SetOut(Location::SameAsFirstInput());
+      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
 
@@ -1869,16 +1890,27 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
   LocationSummary* locations = add->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
+  Location out = locations->Out();
 
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ addl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+          __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+        } else {
+          __ leal(out.AsRegister<CpuRegister>(), Address(
+              first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
+        }
       } else if (second.IsConstant()) {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ addl(first.AsRegister<CpuRegister>(), imm);
+        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+          __ addl(out.AsRegister<CpuRegister>(),
+                  Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ leal(out.AsRegister<CpuRegister>(), Address(
+              first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
+        DCHECK(first.Equals(locations->Out()));
         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
@@ -2754,7 +2786,7 @@ void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instr
   Location obj = locations->InAt(0);
 
   if (obj.IsRegister()) {
-    __ cmpl(obj.AsRegister<CpuRegister>(), Immediate(0));
+    __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
   } else if (obj.IsStackSlot()) {
     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
   } else {
@@ -3236,13 +3268,17 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
     }
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
-    if (constant->IsIntConstant()) {
-      Immediate imm(constant->AsIntConstant()->GetValue());
+    if (constant->IsIntConstant() || constant->IsNullConstant()) {
+      int32_t value = CodeGenerator::GetInt32ValueOf(constant);
       if (destination.IsRegister()) {
-        __ movl(destination.AsRegister<CpuRegister>(), imm);
+        if (value == 0) {
+          __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
+        } else {
+          __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
+        }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
-        __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
+        __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
       }
     } else if (constant->IsLongConstant()) {
       int64_t value = constant->AsLongConstant()->GetValue();
@@ -3675,5 +3711,17 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in
   }
 }
 
+void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  UNUSED(instruction);
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  UNUSED(instruction);
+  LOG(FATAL) << "Unreachable";
+}
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dbdbf869db..707c9992c0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -232,7 +232,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   void LoadCurrentMethod(CpuRegister reg);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return block_labels_.GetRawStorage() + block->GetBlockId();
+    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
   }
 
   void Initialize() OVERRIDE {
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index b246c6f98d..7623e421fd 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "builder.h"
 #include "dex_instruction.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index e05d9b3b0f..2bfecc696a 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "builder.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
 #include "pretty_printer.h"
 
 #include "gtest/gtest.h"
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 4ebb1363cc..a7f1f74e27 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -160,6 +160,22 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
                             instruction->GetId()));
     }
   }
+
+  // Ensure 'instruction' has pointers to its inputs' use entries.
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
+    HInstruction* input = input_record.GetInstruction();
+    HUseListNode<HInstruction*>* use_node = input_record.GetUseNode();
+    if (use_node == nullptr || !input->GetUses().Contains(use_node)) {
+      AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry "
+                            "at input %u (%s:%d).",
+                            instruction->DebugName(),
+                            instruction->GetId(),
+                            static_cast<unsigned>(i),
+                            input->DebugName(),
+                            input->GetId()));
+    }
+  }
 }
 
 void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
@@ -285,6 +301,19 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
   }
 }
 
+static Primitive::Type PrimitiveKind(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
+      return Primitive::kPrimInt;
+    default:
+      return type;
+  }
+}
+
 void SSAChecker::VisitPhi(HPhi* phi) {
   VisitInstruction(phi);
 
@@ -321,18 +350,17 @@ void SSAChecker::VisitPhi(HPhi* phi) {
       }
     }
   }
-}
-
-static Primitive::Type PrimitiveKind(Primitive::Type type) {
-  switch (type) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimInt:
-      return Primitive::kPrimInt;
-    default:
-      return type;
+  // Ensure that the inputs have the same primitive kind as the phi.
+  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+    HInstruction* input = phi->InputAt(i);
+    if (PrimitiveKind(input->GetType()) != PrimitiveKind(phi->GetType())) {
+        AddError(StringPrintf(
+            "Input %d at index %zu of phi %d from block %d does not have the "
+            "same type as the phi: %s versus %s",
+            input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
+            Primitive::PrettyDescriptor(input->GetType()),
+            Primitive::PrettyDescriptor(phi->GetType())));
+    }
   }
 }
 
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index c59f8366fa..4742e4d073 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "base/stringprintf.h"
 #include "builder.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "pretty_printer.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 835bca688f..c59273753e 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -184,6 +184,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
     output_ << " " << instruction->GetValue();
   }
 
+  void VisitPhi(HPhi* phi) OVERRIDE {
+    output_ << " " << phi->GetRegNumber();
+  }
+
   void PrintInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
     instruction->Accept(this);
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 89bba2d9f6..cb448c883f 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -270,7 +270,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
     set = new (allocator_) ValueSet(allocator_);
   } else {
     HBasicBlock* dominator = block->GetDominator();
-    set = sets_.Get(dominator->GetBlockId())->Copy();
+    set = sets_.Get(dominator->GetBlockId());
     if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) {
       // We have to copy if the dominator has other successors, or `block` is not a successor
       // of the dominator.
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 4a48fee2fb..a81d49aa0c 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "builder.h"
 #include "gvn.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "side_effects_analysis.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 32f6972c84..d55a3ca00b 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -159,7 +159,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
   SsaDeadPhiElimination dead_phi(callee_graph);
   HDeadCodeElimination dce(callee_graph);
   HConstantFolding fold(callee_graph);
-  InstructionSimplifier simplify(callee_graph);
+  InstructionSimplifier simplify(callee_graph, stats_);
 
   HOptimization* optimizations[] = {
     &redundant_phi,
@@ -176,7 +176,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
 
   if (depth_ + 1 < kDepthLimit) {
     HInliner inliner(
-        callee_graph, outer_compilation_unit_, compiler_driver_, outer_stats_, depth_ + 1);
+        callee_graph, outer_compilation_unit_, compiler_driver_, stats_, depth_ + 1);
     inliner.Run();
   }
 
@@ -221,7 +221,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
   // after optimizations get a unique id.
   graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId());
   VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file);
-  outer_stats_->RecordStat(kInlinedInvoke);
+  MaybeRecordStat(kInlinedInvoke);
   return true;
 }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 07d893e7c9..8e9cf837df 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -35,10 +35,9 @@ class HInliner : public HOptimization {
            CompilerDriver* compiler_driver,
            OptimizingCompilerStats* stats,
            size_t depth = 0)
-      : HOptimization(outer_graph, true, "inliner"),
+      : HOptimization(outer_graph, true, "inliner", stats),
         outer_compilation_unit_(outer_compilation_unit),
         compiler_driver_(compiler_driver),
-        outer_stats_(stats),
         depth_(depth) {}
 
   void Run() OVERRIDE;
@@ -48,7 +47,6 @@ class HInliner : public HOptimization {
 
   const DexCompilationUnit& outer_compilation_unit_;
   CompilerDriver* const compiler_driver_;
-  OptimizingCompilerStats* const outer_stats_;
   const size_t depth_;
 
   DISALLOW_COPY_AND_ASSIGN(HInliner);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 17c8f337ca..fd99070780 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,11 +16,15 @@
 
 #include "instruction_simplifier.h"
 
+#include "mirror/class-inl.h"
+#include "scoped_thread_state_change.h"
+
 namespace art {
 
 class InstructionSimplifierVisitor : public HGraphVisitor {
  public:
-  explicit InstructionSimplifierVisitor(HGraph* graph) : HGraphVisitor(graph) {}
+  InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph), stats_(stats) {}
 
  private:
   void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
@@ -28,10 +32,14 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
   void VisitArraySet(HArraySet* equal) OVERRIDE;
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
   void VisitNullCheck(HNullCheck* instruction) OVERRIDE;
+  void VisitArrayLength(HArrayLength* instruction) OVERRIDE;
+  void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
+
+  OptimizingCompilerStats* stats_;
 };
 
 void InstructionSimplifier::Run() {
-  InstructionSimplifierVisitor visitor(graph_);
+  InstructionSimplifierVisitor visitor(graph_, stats_);
   visitor.VisitInsertionOrder();
 }
 
@@ -40,6 +48,28 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
   if (!obj->CanBeNull()) {
     null_check->ReplaceWith(obj);
     null_check->GetBlock()->RemoveInstruction(null_check);
+    if (stats_ != nullptr) {
+      stats_->RecordStat(MethodCompilationStat::kRemovedNullCheck);
+    }
+  }
+}
+
+void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
+  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
+  if (!load_class->IsResolved()) {
+    // If the class couldn't be resolve it's not safe to compare against it. It's
+    // default type would be Top which might be wider that the actual class type
+    // and thus producing wrong results.
+    return;
+  }
+  ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo();
+  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+  ScopedObjectAccess soa(Thread::Current());
+  if (class_rti.IsSupertypeOf(obj_rti)) {
+    check_cast->GetBlock()->RemoveInstruction(check_cast);
+    if (stats_ != nullptr) {
+      stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast);
+    }
   }
 }
 
@@ -75,6 +105,18 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
   }
 }
 
+void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) {
+  HInstruction* input = instruction->InputAt(0);
+  // If the array is a NewArray with constant size, replace the array length
+  // with the constant instruction. This helps the bounds check elimination phase.
+  if (input->IsNewArray()) {
+    input = input->InputAt(0);
+    if (input->IsIntConstant()) {
+      instruction->ReplaceWith(input);
+    }
+  }
+}
+
 void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) {
   HInstruction* value = instruction->GetValue();
   if (value->GetType() != Primitive::kPrimNot) return;
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index bca6697d05..a7ff755aed 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -19,6 +19,7 @@
 
 #include "nodes.h"
 #include "optimization.h"
+#include "optimizing_compiler_stats.h"
 
 namespace art {
 
@@ -27,8 +28,10 @@ namespace art {
  */
 class InstructionSimplifier : public HOptimization {
  public:
-  explicit InstructionSimplifier(HGraph* graph, const char* name = "instruction_simplifier")
-    : HOptimization(graph, true, name) {}
+  InstructionSimplifier(HGraph* graph,
+                        OptimizingCompilerStats* stats = nullptr,
+                        const char* name = "instruction_simplifier")
+    : HOptimization(graph, true, name, stats) {}
 
   void Run() OVERRIDE;
 };
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
new file mode 100644
index 0000000000..a82d80af13
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -0,0 +1,883 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_arm.h"
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_arm.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/arm/assembler_arm.h"
+
+namespace art {
+
+namespace arm {
+
+ArmAssembler* IntrinsicCodeGeneratorARM::GetAssembler() {
+  return codegen_->GetAssembler();
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+#define __ codegen->GetAssembler()->
+
+static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGeneratorARM* codegen) {
+  if (!trg.IsValid()) {
+    DCHECK(type == Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  if (Primitive::IsIntegralType(type)) {
+    if (type == Primitive::kPrimLong) {
+      Register trg_reg_lo = trg.AsRegisterPairLow<Register>();
+      Register trg_reg_hi = trg.AsRegisterPairHigh<Register>();
+      Register res_reg_lo = R0;
+      Register res_reg_hi = R1;
+      if (trg_reg_lo != res_reg_hi) {
+        if (trg_reg_lo != res_reg_lo) {
+          __ mov(trg_reg_lo, ShifterOperand(res_reg_lo));
+          __ mov(trg_reg_hi, ShifterOperand(res_reg_hi));
+        } else {
+          DCHECK_EQ(trg_reg_lo + 1, trg_reg_hi);
+        }
+      } else {
+        __ mov(trg_reg_hi, ShifterOperand(res_reg_hi));
+        __ mov(trg_reg_lo, ShifterOperand(res_reg_lo));
+      }
+    } else {
+      Register trg_reg = trg.AsRegister<Register>();
+      Register res_reg = R0;
+      if (trg_reg != res_reg) {
+        __ mov(trg_reg, ShifterOperand(res_reg));
+      }
+    }
+  } else {
+    UNIMPLEMENTED(FATAL) << "Floating-point return.";
+  }
+}
+
+static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) {
+  if (invoke->InputCount() == 0) {
+    return;
+  }
+
+  LocationSummary* locations = invoke->GetLocations();
+  InvokeDexCallingConventionVisitor calling_convention_visitor;
+
+  // We're moving potentially two or more locations to locations that could overlap, so we need
+  // a parallel move resolver.
+  HParallelMove parallel_move(arena);
+
+  for (size_t i = 0; i < invoke->InputCount(); i++) {
+    HInstruction* input = invoke->InputAt(i);
+    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
+    Location actual_loc = locations->InAt(i);
+
+    parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+  }
+
+  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
+// call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
+//       restored!
+class IntrinsicSlowPathARM : public SlowPathCodeARM {
+ public:
+  explicit IntrinsicSlowPathARM(HInvoke* invoke) : invoke_(invoke) { }
+
+  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+    CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in);
+    __ Bind(GetEntryLabel());
+
+    codegen->SaveLiveRegisters(invoke_->GetLocations());
+
+    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
+    } else {
+      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+      UNREACHABLE();
+    }
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+    }
+
+    codegen->RestoreLiveRegisters(invoke_->GetLocations());
+    __ b(GetExitLabel());
+  }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM);
+};
+
+#undef __
+
+bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  return res != nullptr && res->Intrinsified();
+}
+
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  if (is64bit) {
+    __ vmovrrd(output.AsRegisterPairLow<Register>(),
+               output.AsRegisterPairHigh<Register>(),
+               FromLowSToD(input.AsFpuRegisterPairLow<SRegister>()));
+  } else {
+    __ vmovrs(output.AsRegister<Register>(), input.AsFpuRegister<SRegister>());
+  }
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  if (is64bit) {
+    __ vmovdrr(FromLowSToD(output.AsFpuRegisterPairLow<SRegister>()),
+               input.AsRegisterPairLow<Register>(),
+               input.AsRegisterPairHigh<Register>());
+  } else {
+    __ vmovsr(output.AsFpuRegister<SRegister>(), input.AsRegister<Register>());
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+  Location in = locations->InAt(0);
+  Location out = locations->Out();
+
+  if (is64bit) {
+    __ vabsd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
+             FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
+  } else {
+    __ vabss(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>());
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations,
+                          bool is64bit,
+                          ArmAssembler* assembler) {
+  Location in = locations->InAt(0);
+  Location output = locations->Out();
+
+  Register mask = locations->GetTemp(0).AsRegister<Register>();
+
+  if (is64bit) {
+    Register in_reg_lo = in.AsRegisterPairLow<Register>();
+    Register in_reg_hi = in.AsRegisterPairHigh<Register>();
+    Register out_reg_lo = output.AsRegisterPairLow<Register>();
+    Register out_reg_hi = output.AsRegisterPairHigh<Register>();
+
+    DCHECK_NE(out_reg_lo, in_reg_hi) << "Diagonal overlap unexpected.";
+
+    __ Asr(mask, in_reg_hi, 31);
+    __ adds(out_reg_lo, in_reg_lo, ShifterOperand(mask));
+    __ adc(out_reg_hi, in_reg_hi, ShifterOperand(mask));
+    __ eor(out_reg_lo, mask, ShifterOperand(out_reg_lo));
+    __ eor(out_reg_hi, mask, ShifterOperand(out_reg_hi));
+  } else {
+    Register in_reg = in.AsRegister<Register>();
+    Register out_reg = output.AsRegister<Register>();
+
+    __ Asr(mask, in_reg, 31);
+    __ add(out_reg, in_reg, ShifterOperand(mask));
+    __ eor(out_reg, mask, ShifterOperand(out_reg));
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations,
+                      bool is_min,
+                      ArmAssembler* assembler) {
+  Register op1 = locations->InAt(0).AsRegister<Register>();
+  Register op2 = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  __ cmp(op1, ShifterOperand(op2));
+
+  __ it((is_min) ? Condition::LT : Condition::GT, kItElse);
+  __ mov(out, ShifterOperand(op1), is_min ? Condition::LT : Condition::GT);
+  __ mov(out, ShifterOperand(op2), is_min ? Condition::GE : Condition::LE);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathSqrt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  ArmAssembler* assembler = GetAssembler();
+  __ vsqrtd(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
+            FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekByte(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ ldrsb(invoke->GetLocations()->Out().AsRegister<Register>(),
+           Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ ldr(invoke->GetLocations()->Out().AsRegister<Register>(),
+         Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+  // exception. So we can't use ldrd as addr may be unaligned.
+  Register lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>();
+  Register hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>();
+  if (addr == lo) {
+    __ ldr(hi, Address(addr, 4));
+    __ ldr(lo, Address(addr, 0));
+  } else {
+    __ ldr(lo, Address(addr, 0));
+    __ ldr(hi, Address(addr, 4));
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ ldrsh(invoke->GetLocations()->Out().AsRegister<Register>(),
+           Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeByte(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  __ strb(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+          Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  __ str(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+         Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+  // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+  // exception. So we can't use ldrd as addr may be unaligned.
+  __ str(invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(), Address(addr, 0));
+  __ str(invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(), Address(addr, 4));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  __ strh(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+          Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitThreadCurrentThread(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  __ LoadFromOffset(kLoadWord,
+                    invoke->GetLocations()->Out().AsRegister<Register>(),
+                    TR,
+                    Thread::PeerOffset<kArmPointerSize>().Int32Value());
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         CodeGeneratorARM* codegen) {
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK((type == Primitive::kPrimInt) ||
+         (type == Primitive::kPrimLong) ||
+         (type == Primitive::kPrimNot));
+  ArmAssembler* assembler = codegen->GetAssembler();
+  Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
+  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Long offset, lo part only.
+
+  if (type == Primitive::kPrimLong) {
+    Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
+    __ add(IP, base, ShifterOperand(offset));
+    if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+      Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
+      __ ldrexd(trg_lo, trg_hi, IP);
+    } else {
+      __ ldrd(trg_lo, Address(IP));
+    }
+  } else {
+    Register trg = locations->Out().AsRegister<Register>();
+    __ ldr(trg, Address(base, offset));
+  }
+
+  if (is_volatile) {
+    __ dmb(ISH);
+  }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+}
+
+static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
+                                     const ArmInstructionSetFeatures& features,
+                                     Primitive::Type type,
+                                     bool is_volatile,
+                                     HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+
+  if (type == Primitive::kPrimLong) {
+    // Potentially need temps for ldrexd-strexd loop.
+    if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
+      locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
+      locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
+    }
+  } else if (type == Primitive::kPrimNot) {
+    // Temps for card-marking.
+    locations->AddTemp(Location::RequiresRegister());  // Temp.
+    locations->AddTemp(Location::RequiresRegister());  // Card.
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke);
+}
+
+static void GenUnsafePut(LocationSummary* locations,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         bool is_ordered,
+                         CodeGeneratorARM* codegen) {
+  ArmAssembler* assembler = codegen->GetAssembler();
+
+  Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
+  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Register value;
+
+  if (is_volatile || is_ordered) {
+    __ dmb(ISH);
+  }
+
+  if (type == Primitive::kPrimLong) {
+    Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
+    value = value_lo;
+    if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+      Register temp_lo = locations->GetTemp(0).AsRegister<Register>();
+      Register temp_hi = locations->GetTemp(1).AsRegister<Register>();
+      Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
+
+      __ add(IP, base, ShifterOperand(offset));
+      Label loop_head;
+      __ Bind(&loop_head);
+      __ ldrexd(temp_lo, temp_hi, IP);
+      __ strexd(temp_lo, value_lo, value_hi, IP);
+      __ cmp(temp_lo, ShifterOperand(0));
+      __ b(&loop_head, NE);
+    } else {
+      __ add(IP, base, ShifterOperand(offset));
+      __ strd(value_lo, Address(IP));
+    }
+  } else {
+    value =  locations->InAt(3).AsRegister<Register>();
+    __ str(value, Address(base, offset));
+  }
+
+  if (is_volatile) {
+    __ dmb(ISH);
+  }
+
+  if (type == Primitive::kPrimNot) {
+    Register temp = locations->GetTemp(0).AsRegister<Register>();
+    Register card = locations->GetTemp(1).AsRegister<Register>();
+    codegen->MarkGCCard(temp, card, base, value);
+  }
+}
+
+void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+}
+
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
+                                                HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+  locations->AddTemp(Location::RequiresRegister());  // Pointer.
+  locations->AddTemp(Location::RequiresRegister());  // Temp 1.
+  locations->AddTemp(Location::RequiresRegister());  // Temp 2.
+}
+
+static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) {
+  DCHECK_NE(type, Primitive::kPrimLong);
+
+  ArmAssembler* assembler = codegen->GetAssembler();
+
+  Register out = locations->Out().AsRegister<Register>();              // Boolean result.
+
+  Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
+  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Offset (discard high 4B).
+  Register expected_lo = locations->InAt(3).AsRegister<Register>();    // Expected.
+  Register value_lo = locations->InAt(4).AsRegister<Register>();       // Value.
+
+  Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>();     // Pointer to actual memory.
+  Register tmp_lo = locations->GetTemp(1).AsRegister<Register>();      // Value in memory.
+
+  if (type == Primitive::kPrimNot) {
+    // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+    // object and scan the receiver at the next GC for nothing.
+    codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo);
+  }
+
+  // Prevent reordering with prior memory operations.
+  __ dmb(ISH);
+
+  __ add(tmp_ptr, base, ShifterOperand(offset));
+
+  // do {
+  //   tmp = [r_ptr] - expected;
+  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+  // result = tmp != 0;
+
+  Label loop_head;
+  __ Bind(&loop_head);
+
+  __ ldrex(tmp_lo, tmp_ptr);
+
+  __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
+
+  __ it(EQ, ItState::kItT);
+  __ strex(tmp_lo, value_lo, tmp_ptr, EQ);
+  __ cmp(tmp_lo, ShifterOperand(1), EQ);
+
+  __ b(&loop_head, EQ);
+
+  __ dmb(ISH);
+
+  __ rsbs(out, tmp_lo, ShifterOperand(1));
+  __ it(CC);
+  __ mov(out, ShifterOperand(0), CC);
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) {
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) {
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) {
+  ArmAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Location of reference to data array
+  const MemberOffset value_offset = mirror::String::ValueOffset();
+  // Location of count
+  const MemberOffset count_offset = mirror::String::CountOffset();
+  // Starting offset within data array
+  const MemberOffset offset_offset = mirror::String::OffsetOffset();
+  // Start of char data with array_
+  const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
+
+  Register obj = locations->InAt(0).AsRegister<Register>();  // String object pointer.
+  Register idx = locations->InAt(1).AsRegister<Register>();  // Index of character.
+  Register out = locations->Out().AsRegister<Register>();    // Result character.
+
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  Register array_temp = locations->GetTemp(1).AsRegister<Register>();
+
+  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
+  //       the cost.
+  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
+  //       we will not optimize the code for constants (which would save a register).
+
+  SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  __ ldr(temp, Address(obj, count_offset.Int32Value()));          // temp = str.length.
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ cmp(idx, ShifterOperand(temp));
+  __ b(slow_path->GetEntryLabel(), CS);
+
+  // Index computation.
+  __ ldr(temp, Address(obj, offset_offset.Int32Value()));         // temp := str.offset.
+  __ ldr(array_temp, Address(obj, value_offset.Int32Value()));    // array_temp := str.offset.
+  __ add(temp, temp, ShifterOperand(idx));
+  DCHECK_EQ(data_offset.Int32Value() % 2, 0);                     // We'll compensate by shifting.
+  __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2));
+
+  // Load the value.
+  __ ldrh(out, Address(array_temp, temp, LSL, 1));                // out := array_temp[temp].
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
+void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}                                                                                      \
+void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
+UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
+UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat)
+UNIMPLEMENTED_INTRINSIC(MathMinLongLong)
+UNIMPLEMENTED_INTRINSIC(MathMaxLongLong)
+UNIMPLEMENTED_INTRINSIC(MathCeil)          // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathFloor)         // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)   // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)    // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)     // High register pressure.
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringIsEmpty)  // Might not want to do these two anyways, inlining should
+UNIMPLEMENTED_INTRINSIC(StringLength)   // be good enough here.
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
new file mode 100644
index 0000000000..8bfb7d4686
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class ArmInstructionSetFeatures;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace arm {
+
+class ArmAssembler;
+class CodeGeneratorARM;
+
+class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderARM(ArenaAllocator* arena,
+                                        const ArmInstructionSetFeatures& features)
+      : arena_(arena), features_(features) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+
+  const ArmInstructionSetFeatures& features_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM);
+};
+
+class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorARM(CodeGeneratorARM* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  ArmAssembler* GetAssembler();
+
+  ArenaAllocator* GetAllocator();
+
+  CodeGeneratorARM* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 7a3d7d8389..8874edc341 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -300,7 +300,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  // We only support FP registers here.
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
@@ -924,7 +923,6 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) {
-  // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCallOnSlowPath,
                                                             kIntrinsified);
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 10f24d8148..bf9b8e59c5 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -66,8 +66,7 @@ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info)
   for (size_t i = 0, e = environment->Size(); i < e; ++i) {
     HInstruction* input = environment->GetInstructionAt(i);
     if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
-      HUseListNode<HEnvironment*>* env_use = environment->GetInstructionEnvUseAt(i);
-      input->RemoveEnvironmentUser(env_use);
+      environment->RemoveAsUserOfInput(i);
       HInstruction* incoming = input->InputAt(0);
       environment->SetRawEnvAt(i, incoming);
       incoming->AddEnvUseAt(environment, i);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index eb27965c79..f22b7a7e82 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -16,6 +16,7 @@
 
 #include <fstream>
 
+#include "base/arena_allocator.h"
 #include "base/stringprintf.h"
 #include "builder.h"
 #include "code_generator.h"
@@ -29,7 +30,6 @@
 #include "pretty_printer.h"
 #include "ssa_builder.h"
 #include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc
index ac8759c805..28000c18f8 100644
--- a/compiler/optimizing/live_interval_test.cc
+++ b/compiler/optimizing/live_interval_test.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "optimizing_unit_test.h"
 #include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 0558b85b47..17914e8206 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "builder.h"
 #include "code_generator.h"
 #include "code_generator_x86.h"
@@ -24,7 +25,6 @@
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
 #include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index c9be570c73..907eff162f 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "builder.h"
 #include "code_generator.h"
 #include "code_generator_x86.h"
@@ -24,7 +25,6 @@
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
 #include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 990d662d86..4ac1fe8573 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -64,6 +64,13 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
 
 std::ostream& operator<<(std::ostream& os, const Location& location) {
   os << location.DebugString();
+  if (location.IsRegister() || location.IsFpuRegister()) {
+    os << location.reg();
+  } else if (location.IsPair()) {
+    os << location.low() << ":" << location.high();
+  } else if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+    os << location.GetStackIndex();
+  }
   return os;
 }
 
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index bf27c5cf7a..198cc15cce 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -17,10 +17,10 @@
 #ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_
 #define ART_COMPILER_OPTIMIZING_LOCATIONS_H_
 
+#include "base/arena_object.h"
 #include "base/bit_field.h"
 #include "base/bit_vector.h"
 #include "base/value_object.h"
-#include "utils/arena_object.h"
 #include "utils/growable_array.h"
 
 namespace art {
@@ -151,6 +151,10 @@ class Location : public ValueObject {
     return GetKind() == kFpuRegisterPair;
   }
 
+  bool IsRegisterKind() const {
+    return IsRegister() || IsFpuRegister() || IsRegisterPair() || IsFpuRegisterPair();
+  }
+
   int reg() const {
     DCHECK(IsRegister() || IsFpuRegister());
     return GetPayload();
@@ -268,6 +272,20 @@ class Location : public ValueObject {
     return value_ == other.value_;
   }
 
+  bool Contains(Location other) const {
+    if (Equals(other)) {
+      return true;
+    } else if (IsFpuRegisterPair() && other.IsFpuRegister()) {
+      return other.reg() == low() || other.reg() == high();
+    } else if (IsRegisterPair() && other.IsRegister()) {
+      return other.reg() == low() || other.reg() == high();
+    } else if (IsDoubleStackSlot() && other.IsStackSlot()) {
+      return (GetStackIndex() == other.GetStackIndex())
+          || (GetStackIndex() + 4 == other.GetStackIndex());
+    }
+    return false;
+  }
+
   const char* DebugString() const {
     switch (GetKind()) {
       case kInvalid: return "I";
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index cd36598171..93787b8bfd 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -18,6 +18,7 @@
 
 #include "ssa_builder.h"
 #include "utils/growable_array.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
@@ -33,17 +34,14 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) {
 
 static void RemoveAsUser(HInstruction* instruction) {
   for (size_t i = 0; i < instruction->InputCount(); i++) {
-    instruction->InputAt(i)->RemoveUser(instruction, i);
+    instruction->RemoveAsUserOfInput(i);
   }
 
   HEnvironment* environment = instruction->GetEnvironment();
   if (environment != nullptr) {
     for (size_t i = 0, e = environment->Size(); i < e; ++i) {
-      HUseListNode<HEnvironment*>* vreg_env_use = environment->GetInstructionEnvUseAt(i);
-      if (vreg_env_use != nullptr) {
-        HInstruction* vreg = environment->GetInstructionAt(i);
-        DCHECK(vreg != nullptr);
-        vreg->RemoveEnvironmentUser(vreg_env_use);
+      if (environment->GetInstructionAt(i) != nullptr) {
+        environment->RemoveAsUserOfInput(i);
       }
     }
   }
@@ -63,22 +61,19 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit
   }
 }
 
-void HGraph::RemoveBlock(HBasicBlock* block) const {
-  for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
-    block->GetSuccessors().Get(j)->RemovePredecessor(block);
-  }
-  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-    block->RemovePhi(it.Current()->AsPhi());
-  }
-  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    block->RemoveInstruction(it.Current());
-  }
-}
-
 void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
   for (size_t i = 0; i < blocks_.Size(); ++i) {
     if (!visited.IsBitSet(i)) {
-      RemoveBlock(blocks_.Get(i));
+      HBasicBlock* block = blocks_.Get(i);
+      for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
+        block->GetSuccessors().Get(j)->RemovePredecessor(block);
+      }
+      for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+        block->RemovePhi(it.Current()->AsPhi(), /*ensure_safety=*/ false);
+      }
+      for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+        block->RemoveInstruction(it.Current(), /*ensure_safety=*/ false);
+      }
     }
   }
 }
@@ -292,6 +287,15 @@ bool HGraph::AnalyzeNaturalLoops() const {
   return true;
 }
 
+HNullConstant* HGraph::GetNullConstant() {
+  if (cached_null_constant_ == nullptr) {
+    cached_null_constant_ = new (arena_) HNullConstant();
+    entry_block_->InsertInstructionBefore(cached_null_constant_,
+                                          entry_block_->GetLastInstruction());
+  }
+  return cached_null_constant_;
+}
+
 void HLoopInformation::Add(HBasicBlock* block) {
   blocks_.SetBit(block->GetBlockId());
 }
@@ -429,22 +433,24 @@ void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
 
 static void Remove(HInstructionList* instruction_list,
                    HBasicBlock* block,
-                   HInstruction* instruction) {
+                   HInstruction* instruction,
+                   bool ensure_safety) {
   DCHECK_EQ(block, instruction->GetBlock());
-  DCHECK(instruction->GetUses().IsEmpty());
-  DCHECK(instruction->GetEnvUses().IsEmpty());
   instruction->SetBlock(nullptr);
   instruction_list->RemoveInstruction(instruction);
-
-  RemoveAsUser(instruction);
+  if (ensure_safety) {
+    DCHECK(instruction->GetUses().IsEmpty());
+    DCHECK(instruction->GetEnvUses().IsEmpty());
+    RemoveAsUser(instruction);
+  }
 }
 
-void HBasicBlock::RemoveInstruction(HInstruction* instruction) {
-  Remove(&instructions_, this, instruction);
+void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) {
+  Remove(&instructions_, this, instruction, ensure_safety);
 }
 
-void HBasicBlock::RemovePhi(HPhi* phi) {
-  Remove(&phis_, this, phi);
+void HBasicBlock::RemovePhi(HPhi* phi, bool ensure_safety) {
+  Remove(&phis_, this, phi, ensure_safety);
 }
 
 void HEnvironment::CopyFrom(HEnvironment* env) {
@@ -457,15 +463,9 @@ void HEnvironment::CopyFrom(HEnvironment* env) {
   }
 }
 
-template <typename T>
-static void RemoveFromUseList(T user, size_t input_index, HUseList<T>* list) {
-  HUseListNode<T>* current;
-  for (HUseIterator<HInstruction*> use_it(*list); !use_it.Done(); use_it.Advance()) {
-    current = use_it.Current();
-    if (current->GetUser() == user && current->GetIndex() == input_index) {
-      list->Remove(current);
-    }
-  }
+void HEnvironment::RemoveAsUserOfInput(size_t index) const {
+  const HUserRecord<HEnvironment*> user_record = vregs_.Get(index);
+  user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
 }
 
 HInstruction* HInstruction::GetNextDisregardingMoves() const {
@@ -484,14 +484,6 @@ HInstruction* HInstruction::GetPreviousDisregardingMoves() const {
   return previous;
 }
 
-void HInstruction::RemoveUser(HInstruction* user, size_t input_index) {
-  RemoveFromUseList(user, input_index, &uses_);
-}
-
-void HInstruction::RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use) {
-  env_uses_.Remove(use);
-}
-
 void HInstructionList::AddInstruction(HInstruction* instruction) {
   if (first_instruction_ == nullptr) {
     DCHECK(last_instruction_ == nullptr);
@@ -602,7 +594,7 @@ void HInstruction::ReplaceWith(HInstruction* other) {
 }
 
 void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
-  InputAt(index)->RemoveUser(this, index);
+  RemoveAsUserOfInput(index);
   SetRawInputAt(index, replacement);
   replacement->AddUseAt(this, index);
 }
@@ -613,7 +605,7 @@ size_t HInstruction::EnvironmentSize() const {
 
 void HPhi::AddInput(HInstruction* input) {
   DCHECK(input->GetBlock() != nullptr);
-  inputs_.Add(input);
+  inputs_.Add(HUserRecord<HInstruction*>(input));
   input->AddUseAt(this, inputs_.Size() - 1);
 }
 
@@ -990,4 +982,14 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
   invoke->GetBlock()->RemoveInstruction(invoke);
 }
 
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
+  ScopedObjectAccess soa(Thread::Current());
+  os << "["
+     << " is_top=" << rhs.IsTop()
+     << " type=" << (rhs.IsTop() ? "?" : PrettyClass(rhs.GetTypeHandle().Get()))
+     << " is_exact=" << rhs.IsExact()
+     << " ]";
+  return os;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 30d869d026..de448cc483 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -17,23 +17,28 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_H_
 #define ART_COMPILER_OPTIMIZING_NODES_H_
 
+#include "base/arena_object.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "handle.h"
+#include "handle_scope.h"
 #include "invoke_type.h"
 #include "locations.h"
+#include "mirror/class.h"
 #include "offsets.h"
 #include "primitive.h"
-#include "utils/arena_object.h"
 #include "utils/arena_bit_vector.h"
 #include "utils/growable_array.h"
 
 namespace art {
 
+class GraphChecker;
 class HBasicBlock;
 class HEnvironment;
 class HInstruction;
 class HIntConstant;
 class HInvoke;
 class HGraphVisitor;
+class HNullConstant;
 class HPhi;
 class HSuspendCheck;
 class LiveInterval;
@@ -194,6 +199,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
     return reverse_post_order_;
   }
 
+  HNullConstant* GetNullConstant();
+
  private:
   HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
   void VisitBlockForDominatorTree(HBasicBlock* block,
@@ -205,7 +212,6 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
                               ArenaBitVector* visiting);
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited) const;
-  void RemoveBlock(HBasicBlock* block) const;
 
   ArenaAllocator* const arena_;
 
@@ -233,6 +239,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int32_t current_instruction_id_;
 
+  // Cached null constant that might be created when building SSA form.
+  HNullConstant* cached_null_constant_;
+
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -481,14 +490,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
   void ReplaceWith(HBasicBlock* other);
 
   void AddInstruction(HInstruction* instruction);
-  void RemoveInstruction(HInstruction* instruction);
   void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
   // Replace instruction `initial` with `replacement` within this block.
   void ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                        HInstruction* replacement);
   void AddPhi(HPhi* phi);
   void InsertPhiAfter(HPhi* instruction, HPhi* cursor);
-  void RemovePhi(HPhi* phi);
+  // RemoveInstruction and RemovePhi delete a given instruction from the respective
+  // instruction list. With 'ensure_safety' set to true, it verifies that the
+  // instruction is not in use and removes it from the use lists of its inputs.
+  void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true);
+  void RemovePhi(HPhi* phi, bool ensure_safety = true);
 
   bool IsLoopHeader() const {
     return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
@@ -574,6 +586,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
   M(ArrayLength, Instruction)                                           \
   M(ArraySet, Instruction)                                              \
   M(BoundsCheck, Instruction)                                           \
+  M(BoundType, Instruction)                                             \
   M(CheckCast, Instruction)                                             \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
@@ -610,6 +623,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
   M(NewInstance, Instruction)                                           \
   M(Not, UnaryOperation)                                                \
   M(NotEqual, Condition)                                                \
+  M(NullConstant, Instruction)                                          \
   M(NullCheck, Instruction)                                             \
   M(Or, BinaryOperation)                                                \
   M(ParallelMove, Instruction)                                          \
@@ -704,6 +718,9 @@ class HUseList : public ValueObject {
   }
 
   void Remove(HUseListNode<T>* node) {
+    DCHECK(node != nullptr);
+    DCHECK(Contains(node));
+
     if (node->prev_ != nullptr) {
       node->prev_->next_ = node->next_;
     }
@@ -715,6 +732,18 @@ class HUseList : public ValueObject {
     }
   }
 
+  bool Contains(const HUseListNode<T>* node) const {
+    if (node == nullptr) {
+      return false;
+    }
+    for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
+      if (current == node) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   bool IsEmpty() const {
     return first_ == nullptr;
   }
@@ -750,6 +779,33 @@ class HUseIterator : public ValueObject {
   friend class HValue;
 };
 
+// This class is used by HEnvironment and HInstruction classes to record the
+// instructions they use and pointers to the corresponding HUseListNodes kept
+// by the used instructions.
+template <typename T>
+class HUserRecord : public ValueObject {
+ public:
+  HUserRecord() : instruction_(nullptr), use_node_(nullptr) {}
+  explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), use_node_(nullptr) {}
+
+  HUserRecord(const HUserRecord<T>& old_record, HUseListNode<T>* use_node)
+    : instruction_(old_record.instruction_), use_node_(use_node) {
+    DCHECK(instruction_ != nullptr);
+    DCHECK(use_node_ != nullptr);
+    DCHECK(old_record.use_node_ == nullptr);
+  }
+
+  HInstruction* GetInstruction() const { return instruction_; }
+  HUseListNode<T>* GetUseNode() const { return use_node_; }
+
+ private:
+  // Instruction used by the user.
+  HInstruction* instruction_;
+
+  // Corresponding entry in the use list kept by 'instruction_'.
+  HUseListNode<T>* use_node_;
+};
+
 // Represents the side effects an instruction may have.
 class SideEffects : public ValueObject {
  public:
@@ -820,50 +876,118 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
      : vregs_(arena, number_of_vregs) {
     vregs_.SetSize(number_of_vregs);
     for (size_t i = 0; i < number_of_vregs; i++) {
-      vregs_.Put(i, VRegInfo(nullptr, nullptr));
+      vregs_.Put(i, HUserRecord<HEnvironment*>());
     }
   }
 
   void CopyFrom(HEnvironment* env);
 
   void SetRawEnvAt(size_t index, HInstruction* instruction) {
-    vregs_.Put(index, VRegInfo(instruction, nullptr));
+    vregs_.Put(index, HUserRecord<HEnvironment*>(instruction));
   }
 
+  HInstruction* GetInstructionAt(size_t index) const {
+    return vregs_.Get(index).GetInstruction();
+  }
+
+  void RemoveAsUserOfInput(size_t index) const;
+
+  size_t Size() const { return vregs_.Size(); }
+
+ private:
   // Record instructions' use entries of this environment for constant-time removal.
+  // It should only be called by HInstruction when a new environment use is added.
   void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) {
     DCHECK(env_use->GetUser() == this);
     size_t index = env_use->GetIndex();
-    VRegInfo info = vregs_.Get(index);
-    DCHECK(info.vreg_ != nullptr);
-    DCHECK(info.node_ == nullptr);
-    vregs_.Put(index, VRegInfo(info.vreg_, env_use));
+    vregs_.Put(index, HUserRecord<HEnvironment*>(vregs_.Get(index), env_use));
   }
 
-  HInstruction* GetInstructionAt(size_t index) const {
-    return vregs_.Get(index).vreg_;
+  GrowableArray<HUserRecord<HEnvironment*> > vregs_;
+
+  friend HInstruction;
+
+  DISALLOW_COPY_AND_ASSIGN(HEnvironment);
+};
+
+class ReferenceTypeInfo : ValueObject {
+ public:
+  typedef Handle<mirror::Class> TypeHandle;
+
+  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (type_handle->IsObjectClass()) {
+      // Override the type handle to be consistent with the case when we get to
+      // Top but don't have the Object class available. It avoids having to guess
+      // what value the type_handle has when it's Top.
+      return ReferenceTypeInfo(TypeHandle(), is_exact, true);
+    } else {
+      return ReferenceTypeInfo(type_handle, is_exact, false);
+    }
   }
 
-  HUseListNode<HEnvironment*>* GetInstructionEnvUseAt(size_t index) const {
-    return vregs_.Get(index).node_;
+  static ReferenceTypeInfo CreateTop(bool is_exact) {
+    return ReferenceTypeInfo(TypeHandle(), is_exact, true);
   }
 
-  size_t Size() const { return vregs_.Size(); }
+  bool IsExact() const { return is_exact_; }
+  bool IsTop() const { return is_top_; }
 
- private:
-  struct VRegInfo {
-    HInstruction* vreg_;
-    HUseListNode<HEnvironment*>* node_;
+  Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
 
-    VRegInfo(HInstruction* instruction, HUseListNode<HEnvironment*>* env_use)
-        : vreg_(instruction), node_(env_use) {}
-  };
+  bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (IsTop()) {
+      // Top (equivalent for java.lang.Object) is supertype of anything.
+      return true;
+    }
+    if (rti.IsTop()) {
+      // If we get here `this` is not Top() so it can't be a supertype.
+      return false;
+    }
+    return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
 
-  GrowableArray<VRegInfo> vregs_;
+  // Returns true if the type information provide the same amount of details.
+  // Note that it does not mean that the instructions have the same actual type
+  // (e.g. tops are equal but they can be the result of a merge).
+  bool IsEqual(ReferenceTypeInfo rti) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (IsExact() != rti.IsExact()) {
+      return false;
+    }
+    if (IsTop() && rti.IsTop()) {
+      // `Top` means java.lang.Object, so the types are equivalent.
+      return true;
+    }
+    if (IsTop() || rti.IsTop()) {
+      // If only one is top or object than they are not equivalent.
+      // NB: We need this extra check because the type_handle of `Top` is invalid
+      // and we cannot inspect its reference.
+      return false;
+    }
 
-  DISALLOW_COPY_AND_ASSIGN(HEnvironment);
+    // Finally check the types.
+    return GetTypeHandle().Get() == rti.GetTypeHandle().Get();
+  }
+
+ private:
+  ReferenceTypeInfo() : ReferenceTypeInfo(TypeHandle(), false, true) {}
+  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact, bool is_top)
+      : type_handle_(type_handle), is_exact_(is_exact), is_top_(is_top) {}
+
+  // The class of the object.
+  TypeHandle type_handle_;
+  // Whether or not the type is exact or a superclass of the actual type.
+  // Whether or not we have any information about this type.
+  bool is_exact_;
+  // A true value here means that the object type should be java.lang.Object.
+  // We don't have access to the corresponding mirror object every time so this
+  // flag acts as a substitute. When true, the TypeHandle refers to a null
+  // pointer and should not be used.
+  bool is_top_;
 };
 
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
+
 class HInstruction : public ArenaObject<kArenaAllocMisc> {
  public:
   explicit HInstruction(SideEffects side_effects)
@@ -876,7 +1000,8 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
         locations_(nullptr),
         live_interval_(nullptr),
         lifetime_position_(kNoLifetime),
-        side_effects_(side_effects) {}
+        side_effects_(side_effects),
+        reference_type_info_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {}
 
   virtual ~HInstruction() {}
 
@@ -899,13 +1024,15 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
   bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); }
 
   virtual size_t InputCount() const = 0;
-  virtual HInstruction* InputAt(size_t i) const = 0;
+  HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
 
   virtual void Accept(HGraphVisitor* visitor) = 0;
   virtual const char* DebugName() const = 0;
 
   virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; }
-  virtual void SetRawInputAt(size_t index, HInstruction* input) = 0;
+  void SetRawInputAt(size_t index, HInstruction* input) {
+    SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
+  }
 
   virtual bool NeedsEnvironment() const { return false; }
   virtual bool IsControlFlow() const { return false; }
@@ -914,12 +1041,24 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
 
   // Does not apply for all instructions, but having this at top level greatly
   // simplifies the null check elimination.
-  virtual bool CanBeNull() const { return true; }
+  virtual bool CanBeNull() const {
+    DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types";
+    return true;
+  }
 
   virtual bool CanDoImplicitNullCheck() const { return false; }
 
+  void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) {
+    reference_type_info_ = reference_type_info;
+  }
+
+  ReferenceTypeInfo GetReferenceTypeInfo() const { return reference_type_info_; }
+
   void AddUseAt(HInstruction* user, size_t index) {
-    uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
+    DCHECK(user != nullptr);
+    HUseListNode<HInstruction*>* use =
+        uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
+    user->SetRawInputRecordAt(index, HUserRecord<HInstruction*>(user->InputRecordAt(index), use));
   }
 
   void AddEnvUseAt(HEnvironment* user, size_t index) {
@@ -929,11 +1068,13 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
     user->RecordEnvUse(env_use);
   }
 
-  void RemoveUser(HInstruction* user, size_t index);
-  void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use);
+  void RemoveAsUserOfInput(size_t input) {
+    HUserRecord<HInstruction*> input_use = InputRecordAt(input);
+    input_use.GetInstruction()->uses_.Remove(input_use.GetUseNode());
+  }
 
-  const HUseList<HInstruction*>& GetUses() { return uses_; }
-  const HUseList<HEnvironment*>& GetEnvUses() { return env_uses_; }
+  const HUseList<HInstruction*>& GetUses() const { return uses_; }
+  const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; }
 
   bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
   bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
@@ -1015,7 +1156,25 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
   void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; }
   bool HasLiveInterval() const { return live_interval_ != nullptr; }
 
+  bool IsSuspendCheckEntry() const { return IsSuspendCheck() && GetBlock()->IsEntryBlock(); }
+
+  // Returns whether the code generation of the instruction will require to have access
+  // to the current method. Such instructions are:
+  // (1): Instructions that require an environment, as calling the runtime requires
+  //      to walk the stack and have the current method stored at a specific stack address.
+  // (2): Object literals like classes and strings, that are loaded from the dex cache
+  //      fields of the current method.
+  bool NeedsCurrentMethod() const {
+    return NeedsEnvironment() || IsLoadClass() || IsLoadString();
+  }
+
+ protected:
+  virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
+  virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
+
  private:
+  void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
+
   HInstruction* previous_;
   HInstruction* next_;
   HBasicBlock* block_;
@@ -1050,7 +1209,12 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
 
   const SideEffects side_effects_;
 
+  // TODO: for primitive types this should be marked as invalid.
+  ReferenceTypeInfo reference_type_info_;
+
+  friend class GraphChecker;
   friend class HBasicBlock;
+  friend class HEnvironment;
   friend class HGraph;
   friend class HInstructionList;
 
@@ -1170,15 +1334,16 @@ class HTemplateInstruction: public HInstruction {
   virtual ~HTemplateInstruction() {}
 
   virtual size_t InputCount() const { return N; }
-  virtual HInstruction* InputAt(size_t i) const { return inputs_[i]; }
 
  protected:
-  virtual void SetRawInputAt(size_t i, HInstruction* instruction) {
-    inputs_[i] = instruction;
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_[i]; }
+
+  void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE {
+    inputs_[i] = input;
   }
 
  private:
-  EmbeddedArray<HInstruction*, N> inputs_;
+  EmbeddedArray<HUserRecord<HInstruction*>, N> inputs_;
 
   friend class SsaBuilder;
 };
@@ -1663,6 +1828,22 @@ class HDoubleConstant : public HConstant {
   DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
 };
 
+class HNullConstant : public HConstant {
+ public:
+  HNullConstant() : HConstant(Primitive::kPrimNot) {}
+
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+
+  size_t ComputeHashCode() const OVERRIDE { return 0; }
+
+  DECLARE_INSTRUCTION(NullConstant);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNullConstant);
+};
+
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
 class HIntConstant : public HConstant {
@@ -1718,7 +1899,6 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic);
 class HInvoke : public HInstruction {
  public:
   virtual size_t InputCount() const { return inputs_.Size(); }
-  virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); }
 
   // Runtime needs to walk the stack, so Dex -> Dex calls need to
   // know their environment.
@@ -1728,10 +1908,6 @@ class HInvoke : public HInstruction {
     SetRawInputAt(index, argument);
   }
 
-  virtual void SetRawInputAt(size_t index, HInstruction* input) {
-    inputs_.Put(index, input);
-  }
-
   virtual Primitive::Type GetType() const { return return_type_; }
 
   uint32_t GetDexPc() const { return dex_pc_; }
@@ -1763,7 +1939,12 @@ class HInvoke : public HInstruction {
     inputs_.SetSize(number_of_arguments);
   }
 
-  GrowableArray<HInstruction*> inputs_;
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
+  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+    inputs_.Put(index, input);
+  }
+
+  GrowableArray<HUserRecord<HInstruction*> > inputs_;
   const Primitive::Type return_type_;
   const uint32_t dex_pc_;
   const uint32_t dex_method_index_;
@@ -2259,11 +2440,6 @@ class HPhi : public HInstruction {
   }
 
   size_t InputCount() const OVERRIDE { return inputs_.Size(); }
-  HInstruction* InputAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
-
-  void SetRawInputAt(size_t index, HInstruction* input) OVERRIDE {
-    inputs_.Put(index, input);
-  }
 
   void AddInput(HInstruction* input);
 
@@ -2282,8 +2458,15 @@ class HPhi : public HInstruction {
 
   DECLARE_INSTRUCTION(Phi);
 
+ protected:
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
+
+  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+    inputs_.Put(index, input);
+  }
+
  private:
-  GrowableArray<HInstruction*> inputs_;
+  GrowableArray<HUserRecord<HInstruction*> > inputs_;
   const uint32_t reg_number_;
   Primitive::Type type_;
   bool is_live_;
@@ -2608,7 +2791,8 @@ class HLoadClass : public HExpression<0> {
         type_index_(type_index),
         is_referrers_class_(is_referrers_class),
         dex_pc_(dex_pc),
-        generate_clinit_check_(false) {}
+        generate_clinit_check_(false),
+        loaded_class_rti_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
@@ -2646,6 +2830,20 @@ class HLoadClass : public HExpression<0> {
     return !is_referrers_class_;
   }
 
+  ReferenceTypeInfo GetLoadedClassRTI() {
+    return loaded_class_rti_;
+  }
+
+  void SetLoadedClassRTI(ReferenceTypeInfo rti) {
+    // Make sure we only set exact types (the loaded class should never be merged).
+    DCHECK(rti.IsExact());
+    loaded_class_rti_ = rti;
+  }
+
+  bool IsResolved() {
+    return loaded_class_rti_.IsExact();
+  }
+
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
@@ -2656,6 +2854,8 @@ class HLoadClass : public HExpression<0> {
   // Used for code generation.
   bool generate_clinit_check_;
 
+  ReferenceTypeInfo loaded_class_rti_;
+
   DISALLOW_COPY_AND_ASSIGN(HLoadClass);
 };
 
@@ -2858,6 +3058,32 @@ class HInstanceOf : public HExpression<2> {
   DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
 };
 
+class HBoundType : public HExpression<1> {
+ public:
+  HBoundType(HInstruction* input, ReferenceTypeInfo bound_type)
+      : HExpression(Primitive::kPrimNot, SideEffects::None()),
+        bound_type_(bound_type) {
+    SetRawInputAt(0, input);
+  }
+
+  const ReferenceTypeInfo& GetBoundType() const { return bound_type_; }
+
+  bool CanBeNull() const OVERRIDE {
+    // `null instanceof ClassX` always return false so we can't be null.
+    return false;
+  }
+
+  DECLARE_INSTRUCTION(BoundType);
+
+ private:
+  // Encodes the most upper class that this instruction can have. In other words
+  // it is always the case that GetBoundType().IsSupertypeOf(GetReferenceType()).
+  // It is used to bound the type in cases like `if (x instanceof ClassX) {}`
+  const ReferenceTypeInfo bound_type_;
+
+  DISALLOW_COPY_AND_ASSIGN(HBoundType);
+};
+
 class HCheckCast : public HTemplateInstruction<2> {
  public:
   HCheckCast(HInstruction* object,
@@ -2959,7 +3185,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
 
   // True if this blocks a move from the given location.
   bool Blocks(Location loc) const {
-    return !IsEliminated() && source_.Equals(loc);
+    return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_));
   }
 
   // A move is redundant if it's been eliminated, if its source and
@@ -3000,46 +3226,19 @@ class HParallelMove : public HTemplateInstruction<0> {
   void AddMove(Location source, Location destination, HInstruction* instruction) {
     DCHECK(source.IsValid());
     DCHECK(destination.IsValid());
-    // The parallel move resolver does not handle pairs. So we decompose the
-    // pair locations into two moves.
-    if (source.IsPair() && destination.IsPair()) {
-      AddMove(source.ToLow(), destination.ToLow(), instruction);
-      AddMove(source.ToHigh(), destination.ToHigh(), nullptr);
-    } else if (source.IsPair()) {
-      DCHECK(destination.IsDoubleStackSlot()) << destination;
-      AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction);
-      AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr);
-    } else if (destination.IsPair()) {
-      if (source.IsConstant()) {
-        // We put the same constant in the move. The code generator will handle which
-        // low or high part to use.
-        AddMove(source, destination.ToLow(), instruction);
-        AddMove(source, destination.ToHigh(), nullptr);
-      } else {
-        DCHECK(source.IsDoubleStackSlot());
-        AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction);
-        // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to
-        // always be 4.
-        static constexpr int kHighOffset = 4;
-        AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)),
-                destination.ToHigh(),
-                nullptr);
-      }
-    } else {
-      if (kIsDebugBuild) {
-        if (instruction != nullptr) {
-          for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
-            DCHECK_NE(moves_.Get(i).GetInstruction(), instruction)
-              << "Doing parallel moves for the same instruction.";
-          }
-        }
+    if (kIsDebugBuild) {
+      if (instruction != nullptr) {
         for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
-          DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
-              << "Same destination for two moves in a parallel move.";
+          DCHECK_NE(moves_.Get(i).GetInstruction(), instruction)
+            << "Doing parallel moves for the same instruction.";
         }
       }
-      moves_.Add(MoveOperands(source, destination, instruction));
+      for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
+        DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
+            << "Same destination for two moves in a parallel move.";
+      }
     }
+    moves_.Add(MoveOperands(source, destination, instruction));
   }
 
   MoveOperands* MoveOperandsAt(size_t index) const {
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 5dbdc74924..4cf22d3b2e 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "nodes.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index b99f6784f7..b13e07eb22 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -21,6 +21,12 @@
 
 namespace art {
 
+void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const {
+  if (stats_ != nullptr) {
+    stats_->RecordStat(compilation_stat);
+  }
+}
+
 void HOptimization::Check() {
   if (kIsDebugBuild) {
     if (is_in_ssa_form_) {
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index d9e082a7f3..af39e092c7 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
 
 #include "nodes.h"
+#include "optimizing_compiler_stats.h"
 
 namespace art {
 
@@ -34,8 +35,10 @@ class HOptimization : public ValueObject {
  public:
   HOptimization(HGraph* graph,
                 bool is_in_ssa_form,
-                const char* pass_name)
+                const char* pass_name,
+                OptimizingCompilerStats* stats = nullptr)
       : graph_(graph),
+        stats_(stats),
         is_in_ssa_form_(is_in_ssa_form),
         pass_name_(pass_name) {}
 
@@ -51,7 +54,11 @@ class HOptimization : public ValueObject {
   void Check();
 
  protected:
+  void MaybeRecordStat(MethodCompilationStat compilation_stat) const;
+
   HGraph* const graph_;
+  // Used to record stats about the optimization.
+  OptimizingCompilerStats* const stats_;
 
  private:
   // Does the analyzed graph use the SSA form?
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c518f33f53..2fef8c7b3a 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -19,6 +19,7 @@
 #include <fstream>
 #include <stdint.h>
 
+#include "base/arena_allocator.h"
 #include "base/dumpable.h"
 #include "base/timing_logger.h"
 #include "bounds_check_elimination.h"
@@ -47,7 +48,6 @@
 #include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
 #include "reference_type_propagation.h"
-#include "utils/arena_allocator.h"
 
 namespace art {
 
@@ -201,6 +201,7 @@ class OptimizingCompiler FINAL : public Compiler {
   CompiledMethod* CompileOptimized(HGraph* graph,
                                    CodeGenerator* codegen,
                                    CompilerDriver* driver,
+                                   const DexFile& dex_file,
                                    const DexCompilationUnit& dex_compilation_unit,
                                    PassInfoPrinter* pass_info) const;
 
@@ -293,13 +294,15 @@ static void RunOptimizations(HOptimization* optimizations[],
 static void RunOptimizations(HGraph* graph,
                              CompilerDriver* driver,
                              OptimizingCompilerStats* stats,
+                             const DexFile& dex_file,
                              const DexCompilationUnit& dex_compilation_unit,
-                             PassInfoPrinter* pass_info_printer) {
+                             PassInfoPrinter* pass_info_printer,
+                             StackHandleScopeCollection* handles) {
   SsaRedundantPhiElimination redundant_phi(graph);
   SsaDeadPhiElimination dead_phi(graph);
   HDeadCodeElimination dce(graph);
   HConstantFolding fold1(graph);
-  InstructionSimplifier simplify1(graph);
+  InstructionSimplifier simplify1(graph, stats);
 
   HInliner inliner(graph, dex_compilation_unit, driver, stats);
 
@@ -308,8 +311,8 @@ static void RunOptimizations(HGraph* graph,
   GVNOptimization gvn(graph, side_effects);
   LICM licm(graph, side_effects);
   BoundsCheckElimination bce(graph);
-  ReferenceTypePropagation type_propagation(graph);
-  InstructionSimplifier simplify2(graph, "instruction_simplifier_after_types");
+  ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles);
+  InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types");
 
   IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
 
@@ -348,10 +351,12 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
 CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
                                                      CodeGenerator* codegen,
                                                      CompilerDriver* compiler_driver,
+                                                     const DexFile& dex_file,
                                                      const DexCompilationUnit& dex_compilation_unit,
                                                      PassInfoPrinter* pass_info_printer) const {
-  RunOptimizations(
-      graph, compiler_driver, &compilation_stats_, dex_compilation_unit, pass_info_printer);
+  StackHandleScopeCollection handles(Thread::Current());
+  RunOptimizations(graph, compiler_driver, &compilation_stats_,
+                   dex_file, dex_compilation_unit, pass_info_printer, &handles);
 
   PrepareForRegisterAllocation(graph).Run();
   SsaLivenessAnalysis liveness(*graph, codegen);
@@ -376,7 +381,10 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
       compiler_driver,
       codegen->GetInstructionSet(),
       ArrayRef<const uint8_t>(allocator.GetMemory()),
-      codegen->GetFrameSize(),
+      // Follow Quick's behavior and set the frame size to zero if it is
+      // considered "empty" (see the definition of
+      // art::CodeGenerator::HasEmptyFrame).
+      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
       ArrayRef<const uint8_t>(stack_map));
@@ -400,17 +408,21 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
   codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
 
   compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
-  return CompiledMethod::SwapAllocCompiledMethod(compiler_driver,
-                                                 codegen->GetInstructionSet(),
-                                                 ArrayRef<const uint8_t>(allocator.GetMemory()),
-                                                 codegen->GetFrameSize(),
-                                                 codegen->GetCoreSpillMask(),
-                                                 codegen->GetFpuSpillMask(),
-                                                 &src_mapping_table,
-                                                 AlignVectorSize(mapping_table),
-                                                 AlignVectorSize(vmap_table),
-                                                 AlignVectorSize(gc_map),
-                                                 ArrayRef<const uint8_t>());
+  return CompiledMethod::SwapAllocCompiledMethod(
+      compiler_driver,
+      codegen->GetInstructionSet(),
+      ArrayRef<const uint8_t>(allocator.GetMemory()),
+      // Follow Quick's behavior and set the frame size to zero if it is
+      // considered "empty" (see the definition of
+      // art::CodeGenerator::HasEmptyFrame).
+      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+      codegen->GetCoreSpillMask(),
+      codegen->GetFpuSpillMask(),
+      &src_mapping_table,
+      AlignVectorSize(mapping_table),
+      AlignVectorSize(vmap_table),
+      AlignVectorSize(gc_map),
+      ArrayRef<const uint8_t>());
 }
 
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
@@ -508,6 +520,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
     return CompileOptimized(graph,
                             codegen.get(),
                             compiler_driver,
+                            dex_file,
                             dex_compilation_unit,
                             &pass_info_printer);
   } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index cc2723df99..3ebf0f8cd2 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -43,6 +43,8 @@ enum MethodCompilationStat {
   kNotCompiledCantAccesType,
   kNotOptimizedRegisterAllocator,
   kNotCompiledUnhandledInstruction,
+  kRemovedCheckedCast,
+  kRemovedNullCheck,
   kLastStat
 };
 
@@ -96,6 +98,8 @@ class OptimizingCompilerStats {
       case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
       case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+      case kRemovedCheckedCast: return "kRemovedCheckedCast";
+      case kRemovedNullCheck: return "kRemovedNullCheck";
       default: LOG(FATAL) << "invalid stat";
     }
     return "";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index debe466560..7d0641ec13 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -57,17 +57,49 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
   // unallocated, or the move was already eliminated).
   for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
     MoveOperands* move = parallel_move->MoveOperandsAt(i);
-    // The parallel move resolver algorithm does not work with register pairs.
-    DCHECK(!move->GetSource().IsPair());
-    DCHECK(!move->GetDestination().IsPair());
     if (!move->IsRedundant()) {
       moves_.Add(move);
     }
   }
 }
 
+// Update the source of `move`, knowing that `updated_location` has been swapped
+// with `new_source`. Note that `updated_location` can be a pair, therefore if
+// `move` is non-pair, we need to extract which register to use.
+static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
+  Location source = move->GetSource();
+  if (new_source.GetKind() == source.GetKind()) {
+    DCHECK(updated_location.Equals(source));
+    move->SetSource(new_source);
+  } else if (new_source.IsStackSlot()
+             || new_source.IsDoubleStackSlot()
+             || source.IsStackSlot()
+             || source.IsDoubleStackSlot()) {
+    // Stack slots never take part of a pair/non-pair swap.
+    DCHECK(updated_location.Equals(source));
+    move->SetSource(new_source);
+  } else if (source.IsRegister()) {
+    DCHECK(new_source.IsRegisterPair()) << new_source;
+    DCHECK(updated_location.IsRegisterPair()) << updated_location;
+    if (updated_location.low() == source.reg()) {
+      move->SetSource(Location::RegisterLocation(new_source.low()));
+    } else {
+      DCHECK_EQ(updated_location.high(), source.reg());
+      move->SetSource(Location::RegisterLocation(new_source.high()));
+    }
+  } else if (source.IsFpuRegister()) {
+    DCHECK(new_source.IsFpuRegisterPair()) << new_source;
+    DCHECK(updated_location.IsFpuRegisterPair()) << updated_location;
+    if (updated_location.low() == source.reg()) {
+      move->SetSource(Location::FpuRegisterLocation(new_source.low()));
+    } else {
+      DCHECK_EQ(updated_location.high(), source.reg());
+      move->SetSource(Location::FpuRegisterLocation(new_source.high()));
+    }
+  }
+}
 
-void ParallelMoveResolver::PerformMove(size_t index) {
+MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
   // Each call to this function performs a move and deletes it from the move
   // graph.  We first recursively perform any move blocking this one.  We
   // mark a move as "pending" on entry to PerformMove in order to detect
@@ -75,35 +107,59 @@ void ParallelMoveResolver::PerformMove(size_t index) {
   // which means that a call to PerformMove could change any source operand
   // in the move graph.
 
-  DCHECK(!moves_.Get(index)->IsPending());
-  DCHECK(!moves_.Get(index)->IsRedundant());
+  MoveOperands* move = moves_.Get(index);
+  DCHECK(!move->IsPending());
+  if (move->IsRedundant()) {
+    // Because we swap register pairs first, following, un-pending
+    // moves may become redundant.
+    move->Eliminate();
+    return nullptr;
+  }
 
   // Clear this move's destination to indicate a pending move.  The actual
   // destination is saved in a stack-allocated local.  Recursion may allow
   // multiple moves to be pending.
-  DCHECK(!moves_.Get(index)->GetSource().IsInvalid());
-  Location destination = moves_.Get(index)->MarkPending();
+  DCHECK(!move->GetSource().IsInvalid());
+  Location destination = move->MarkPending();
 
   // Perform a depth-first traversal of the move graph to resolve
   // dependencies.  Any unperformed, unpending move with a source the same
   // as this one's destination blocks this one so recursively perform all
   // such moves.
+  MoveOperands* required_swap = nullptr;
   for (size_t i = 0; i < moves_.Size(); ++i) {
     const MoveOperands& other_move = *moves_.Get(i);
     if (other_move.Blocks(destination) && !other_move.IsPending()) {
       // Though PerformMove can change any source operand in the move graph,
-      // this call cannot create a blocking move via a swap (this loop does
-      // not miss any).  Assume there is a non-blocking move with source A
+      // calling `PerformMove` cannot create a blocking move via a swap
+      // (this loop does not miss any).
+      // For example, assume there is a non-blocking move with source A
       // and this move is blocked on source B and there is a swap of A and
       // B.  Then A and B must be involved in the same cycle (or they would
       // not be swapped).  Since this move's destination is B and there is
       // only a single incoming edge to an operand, this move must also be
       // involved in the same cycle.  In that case, the blocking move will
       // be created but will be "pending" when we return from PerformMove.
-      PerformMove(i);
+      required_swap = PerformMove(i);
+
+      if (required_swap == move) {
+        // If this move is required to swap, we do so without looking
+        // at the next moves. Swapping is not blocked by anything, it just
+        // updates other moves's source.
+        break;
+      } else if (required_swap == moves_.Get(i)) {
+        // If `other_move` was swapped, we iterate again to find a new
+        // potential cycle.
+        required_swap = nullptr;
+        i = 0;
+      } else if (required_swap != nullptr) {
+        // A move is required to swap. We walk back the cycle to find the
+        // move by just returning from this `PerforrmMove`.
+        moves_.Get(index)->ClearPending(destination);
+        return required_swap;
+      }
     }
   }
-  MoveOperands* move = moves_.Get(index);
 
   // We are about to resolve this move and don't need it marked as
   // pending, so restore its destination.
@@ -113,19 +169,30 @@ void ParallelMoveResolver::PerformMove(size_t index) {
   // so it may now be the last move in the cycle.  If so remove it.
   if (move->GetSource().Equals(destination)) {
     move->Eliminate();
-    return;
+    DCHECK(required_swap == nullptr);
+    return nullptr;
   }
 
   // The move may be blocked on a (at most one) pending move, in which case
   // we have a cycle.  Search for such a blocking move and perform a swap to
   // resolve it.
   bool do_swap = false;
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& other_move = *moves_.Get(i);
-    if (other_move.Blocks(destination)) {
-      DCHECK(other_move.IsPending());
-      do_swap = true;
-      break;
+  if (required_swap != nullptr) {
+    DCHECK_EQ(required_swap, move);
+    do_swap = true;
+  } else {
+    for (size_t i = 0; i < moves_.Size(); ++i) {
+      const MoveOperands& other_move = *moves_.Get(i);
+      if (other_move.Blocks(destination)) {
+        DCHECK(other_move.IsPending());
+        if (!destination.IsPair() && other_move.GetSource().IsPair()) {
+          // We swap pairs before swapping non-pairs. Go back from the
+          // cycle by returning the pair that must be swapped.
+          return moves_.Get(i);
+        }
+        do_swap = true;
+        break;
+      }
     }
   }
 
@@ -140,15 +207,21 @@ void ParallelMoveResolver::PerformMove(size_t index) {
     for (size_t i = 0; i < moves_.Size(); ++i) {
       const MoveOperands& other_move = *moves_.Get(i);
       if (other_move.Blocks(source)) {
-        moves_.Get(i)->SetSource(swap_destination);
+        UpdateSourceOf(moves_.Get(i), source, swap_destination);
       } else if (other_move.Blocks(swap_destination)) {
-        moves_.Get(i)->SetSource(source);
+        UpdateSourceOf(moves_.Get(i), swap_destination, source);
       }
     }
+    // If the swap was required because of a pair in the middle of a cycle,
+    // we return the swapped move, so that the caller knows it needs to re-iterate
+    // its dependency loop.
+    return required_swap;
   } else {
     // This move is not blocked.
     EmitMove(index);
     move->Eliminate();
+    DCHECK(required_swap == nullptr);
+    return nullptr;
   }
 }
 
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 7ec1dd2deb..3fa1b37afd 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -83,7 +83,15 @@ class ParallelMoveResolver : public ValueObject {
 
   // Perform the move at the moves_ index in question (possibly requiring
   // other moves to satisfy dependencies).
-  void PerformMove(size_t index);
+  //
+  // Return whether another move in the dependency cycle needs to swap. This
+  // is to handle pair swaps, where we want the pair to swap first to avoid
+  // building pairs that are unexpected by the code generator. For example, if
+  // we were to swap R1 with R2, we would need to update all locations using
+  // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make
+  // the code generator understand such pairs, but it's easier and cleaner to
+  // just not create such pairs and exchange pairs in priority.
+  MoveOperands* PerformMove(size_t index);
 
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
 };
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 28b5697bbd..44a3da2817 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
@@ -165,7 +165,7 @@ TEST(ParallelMoveTest, Pairs) {
         Location::RegisterPairLocation(2, 3),
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str());
+    ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
   }
 
   {
@@ -180,7 +180,7 @@ TEST(ParallelMoveTest, Pairs) {
         Location::RegisterLocation(4),
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str());
+    ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
   }
 
   {
@@ -195,7 +195,89 @@ TEST(ParallelMoveTest, Pairs) {
         Location::RegisterLocation(0),
         nullptr);
     resolver.EmitNativeCode(moves);
-    ASSERT_STREQ("(2 <-> 0) (1 -> 3)", resolver.GetMessage().c_str());
+    ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+  }
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterLocation(2),
+        Location::RegisterLocation(7),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(7),
+        Location::RegisterLocation(1),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(0, 1),
+        Location::RegisterPairLocation(2, 3),
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+  }
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterLocation(2),
+        Location::RegisterLocation(7),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(0, 1),
+        Location::RegisterPairLocation(2, 3),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(7),
+        Location::RegisterLocation(1),
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+  }
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterPairLocation(0, 1),
+        Location::RegisterPairLocation(2, 3),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(2),
+        Location::RegisterLocation(7),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(7),
+        Location::RegisterLocation(1),
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+  }
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterPairLocation(0, 1),
+        Location::RegisterPairLocation(2, 3),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(2, 3),
+        Location::RegisterPairLocation(0, 1),
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+  }
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterPairLocation(2, 3),
+        Location::RegisterPairLocation(0, 1),
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(0, 1),
+        Location::RegisterPairLocation(2, 3),
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
   }
 }
 
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 12acd0884a..2d9a2bf330 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -42,6 +42,11 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
   check->ReplaceWith(check->InputAt(0));
 }
 
+void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) {
+  bound_type->ReplaceWith(bound_type->InputAt(0));
+  bound_type->GetBlock()->RemoveInstruction(bound_type);
+}
+
 void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
   HLoadClass* cls = check->GetLoadClass();
   check->ReplaceWith(cls);
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 0fdb65ffe0..0f697fbc25 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -36,6 +36,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
   virtual void VisitNullCheck(HNullCheck* check) OVERRIDE;
   virtual void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
   virtual void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
+  virtual void VisitBoundType(HBoundType* bound_type) OVERRIDE;
   virtual void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   virtual void VisitCondition(HCondition* condition) OVERRIDE;
 
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 9cf8235d85..293fde978e 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "base/stringprintf.h"
 #include "builder.h"
 #include "dex_file.h"
@@ -21,7 +22,6 @@
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "pretty_printer.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index 7e274f6ebf..fe23fcf326 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -40,6 +40,7 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_
 // Re-compute and update the type of the instruction. Returns
 // whether or not the type was changed.
 bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
+  DCHECK(phi->IsLive());
   Primitive::Type existing = phi->GetType();
 
   Primitive::Type new_type = existing;
@@ -49,15 +50,20 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
   }
   phi->SetType(new_type);
 
-  if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) {
+  if (new_type == Primitive::kPrimDouble
+      || new_type == Primitive::kPrimFloat
+      || new_type == Primitive::kPrimNot) {
     // If the phi is of floating point type, we need to update its inputs to that
     // type. For inputs that are phis, we need to recompute their types.
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
       HInstruction* input = phi->InputAt(i);
       if (input->GetType() != new_type) {
-        HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
+        HInstruction* equivalent = (new_type == Primitive::kPrimNot)
+            ? SsaBuilder::GetReferenceTypeEquivalent(input)
+            : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
         phi->ReplaceInput(equivalent, i);
         if (equivalent->IsPhi()) {
+          equivalent->AsPhi()->SetLive();
           AddToWorklist(equivalent->AsPhi());
         }
       }
@@ -78,15 +84,9 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
   if (block->IsLoopHeader()) {
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
-      // Set the initial type for the phi. Use the non back edge input for reaching
-      // a fixed point faster.
-      Primitive::Type phi_type = phi->GetType();
-      // We merge with the existing type, that has been set by the SSA builder.
-      DCHECK(phi_type == Primitive::kPrimVoid
-          || phi_type == Primitive::kPrimFloat
-          || phi_type == Primitive::kPrimDouble);
-      phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType()));
-      AddToWorklist(phi);
+      if (phi->IsLive()) {
+        AddToWorklist(phi);
+      }
     }
   } else {
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
@@ -95,7 +95,10 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
       // doing a reverse post-order visit, therefore either the phi users are
       // non-loop phi and will be visited later in the visit, or are loop-phis,
       // and they are already in the work list.
-      UpdateType(it.Current()->AsPhi());
+      HPhi* phi = it.Current()->AsPhi();
+      if (phi->IsLive()) {
+        UpdateType(phi);
+      }
     }
   }
 }
@@ -110,13 +113,14 @@ void PrimitiveTypePropagation::ProcessWorklist() {
 }
 
 void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
+  DCHECK(instruction->IsLive());
   worklist_.Add(instruction);
 }
 
 void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
   for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr) {
+    if (phi != nullptr && phi->IsLive()) {
       AddToWorklist(phi);
     }
   }
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 24e6837f45..76b8d7eacf 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -16,16 +16,17 @@
 
 #include "reference_type_propagation.h"
 
+#include "class_linker.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
+#include "scoped_thread_state_change.h"
+
 namespace art {
 
-// TODO: Only do the analysis on reference types. We currently have to handle
-// the `null` constant, that is represented as a `HIntConstant` and therefore
-// has the Primitive::kPrimInt type.
+// TODO: handle: a !=/== null.
 
 void ReferenceTypePropagation::Run() {
-  // Compute null status for instructions.
-
-  // To properly propagate not-null info we need to visit in the dominator-based order.
+  // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
   // We take advantage of this order in `VisitBasicBlock`.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
@@ -34,9 +35,210 @@ void ReferenceTypePropagation::Run() {
   ProcessWorklist();
 }
 
+void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
+  // TODO: handle other instructions that give type info
+  // (NewArray/Call/Field accesses/array accesses)
+
+  // Initialize exact types first for faster convergence.
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* instr = it.Current();
+    if (instr->IsNewInstance()) {
+      VisitNewInstance(instr->AsNewInstance());
+    } else if (instr->IsLoadClass()) {
+      VisitLoadClass(instr->AsLoadClass());
+    }
+  }
+
+  // Handle Phis.
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    VisitPhi(it.Current()->AsPhi());
+  }
+
+  // Add extra nodes to bound types.
+  BoundTypeForIfInstanceOf(block);
+}
+
+// Detects if `block` is the True block for the pattern
+// `if (x instanceof ClassX) { }`
+// If that's the case insert an HBoundType instruction to bound the type of `x`
+// to `ClassX` in the scope of the dominated blocks.
+void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) {
+  HInstruction* lastInstruction = block->GetLastInstruction();
+  if (!lastInstruction->IsIf()) {
+    return;
+  }
+  HInstruction* ifInput = lastInstruction->InputAt(0);
+  // TODO: Handle more patterns here: HIf(bool) HIf(HNotEqual).
+  if (!ifInput->IsEqual()) {
+    return;
+  }
+  HInstruction* instanceOf = ifInput->InputAt(0);
+  HInstruction* comp_value = ifInput->InputAt(1);
+  if (!instanceOf->IsInstanceOf() || !comp_value->IsIntConstant()) {
+    return;
+  }
+
+  HInstruction* obj = instanceOf->InputAt(0);
+  HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
+
+  ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
+  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+  HBoundType* bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti);
+
+  // Narrow the type as much as possible.
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) {
+      bound_type->SetReferenceTypeInfo(obj_rti);
+    } else {
+      bound_type->SetReferenceTypeInfo(
+          ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
+    }
+  }
+
+  block->InsertInstructionBefore(bound_type, lastInstruction);
+  // Pick the right successor based on the value we compare against.
+  HIntConstant* comp_value_int = comp_value->AsIntConstant();
+  HBasicBlock* instanceOfTrueBlock = comp_value_int->GetValue() == 0
+      ? lastInstruction->AsIf()->IfFalseSuccessor()
+      : lastInstruction->AsIf()->IfTrueSuccessor();
+
+  for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
+    HInstruction* user = it.Current()->GetUser();
+    if (instanceOfTrueBlock->Dominates(user->GetBlock())) {
+      user->ReplaceInput(bound_type, it.Current()->GetIndex());
+    }
+  }
+}
+
+void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_);
+  // Get type from dex cache assuming it was populated by the verifier.
+  mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
+  if (resolved_class != nullptr) {
+    MutableHandle<mirror::Class> handle = handles_->NewHandle(resolved_class);
+    instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, true));
+  }
+}
+
+void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_);
+  // Get type from dex cache assuming it was populated by the verifier.
+  mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
+  if (resolved_class != nullptr) {
+    Handle<mirror::Class> handle = handles_->NewHandle(resolved_class);
+    instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true));
+  }
+  Handle<mirror::Class> class_handle = handles_->NewHandle(mirror::Class::GetJavaLangClass());
+  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_handle, /* is_exact */ true));
+}
+
+void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
+  if (phi->GetType() != Primitive::kPrimNot) {
+    return;
+  }
+
+  if (phi->GetBlock()->IsLoopHeader()) {
+    // Set the initial type for the phi. Use the non back edge input for reaching
+    // a fixed point faster.
+    AddToWorklist(phi);
+    phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
+    phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
+  } else {
+    // Eagerly compute the type of the phi, for quicker convergence. Note
+    // that we don't need to add users to the worklist because we are
+    // doing a reverse post-order visit, therefore either the phi users are
+    // non-loop phi and will be visited later in the visit, or are loop-phis,
+    // and they are already in the work list.
+    UpdateNullability(phi);
+    UpdateReferenceTypeInfo(phi);
+  }
+}
+
+ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a,
+                                                       const ReferenceTypeInfo& b) {
+  bool is_exact = a.IsExact() && b.IsExact();
+  bool is_top = a.IsTop() || b.IsTop();
+  Handle<mirror::Class> type_handle;
+
+  if (!is_top) {
+    if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
+      type_handle = a.GetTypeHandle();
+    } else if (a.IsSupertypeOf(b)) {
+      type_handle = a.GetTypeHandle();
+      is_exact = false;
+    } else if (b.IsSupertypeOf(a)) {
+      type_handle = b.GetTypeHandle();
+      is_exact = false;
+    } else {
+      // TODO: Find a common super class.
+      is_top = true;
+      is_exact = false;
+    }
+  }
+
+  return is_top
+      ? ReferenceTypeInfo::CreateTop(is_exact)
+      : ReferenceTypeInfo::Create(type_handle, is_exact);
+}
+
+bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
+  ScopedObjectAccess soa(Thread::Current());
+
+  ReferenceTypeInfo previous_rti = instr->GetReferenceTypeInfo();
+  if (instr->IsBoundType()) {
+    UpdateBoundType(instr->AsBoundType());
+  } else if (instr->IsPhi()) {
+    UpdatePhi(instr->AsPhi());
+  } else {
+    LOG(FATAL) << "Invalid instruction (should not get here)";
+  }
+
+  return !previous_rti.IsEqual(instr->GetReferenceTypeInfo());
+}
+
+void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) {
+  ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+  // Be sure that we don't go over the bounded type.
+  ReferenceTypeInfo bound_rti = instr->GetBoundType();
+  if (!bound_rti.IsSupertypeOf(new_rti)) {
+    new_rti = bound_rti;
+  }
+  instr->SetReferenceTypeInfo(new_rti);
+}
+
+void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
+  ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+  if (new_rti.IsTop() && !new_rti.IsExact()) {
+    // Early return if we are Top and inexact.
+    instr->SetReferenceTypeInfo(new_rti);
+    return;
+  }
+  for (size_t i = 1; i < instr->InputCount(); i++) {
+    new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
+    if (new_rti.IsTop()) {
+      if (!new_rti.IsExact()) {
+        break;
+      } else {
+        continue;
+      }
+    }
+  }
+  instr->SetReferenceTypeInfo(new_rti);
+}
+
 // Re-computes and updates the nullability of the instruction. Returns whether or
 // not the nullability was changed.
-bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) {
+bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) {
+  DCHECK(instr->IsPhi() || instr->IsBoundType());
+
+  if (!instr->IsPhi()) {
+    return false;
+  }
+
+  HPhi* phi = instr->AsPhi();
   bool existing_can_be_null = phi->CanBeNull();
   bool new_can_be_null = false;
   for (size_t i = 0; i < phi->InputCount(); i++) {
@@ -47,48 +249,26 @@ bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) {
   return existing_can_be_null != new_can_be_null;
 }
 
-
-void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  if (block->IsLoopHeader()) {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      // Set the initial type for the phi. Use the non back edge input for reaching
-      // a fixed point faster.
-      HPhi* phi = it.Current()->AsPhi();
-      AddToWorklist(phi);
-      phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
-    }
-  } else {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      // Eagerly compute the type of the phi, for quicker convergence. Note
-      // that we don't need to add users to the worklist because we are
-      // doing a reverse post-order visit, therefore either the phi users are
-      // non-loop phi and will be visited later in the visit, or are loop-phis,
-      // and they are already in the work list.
-      UpdateNullability(it.Current()->AsPhi());
-    }
-  }
-}
-
 void ReferenceTypePropagation::ProcessWorklist() {
   while (!worklist_.IsEmpty()) {
-    HPhi* instruction = worklist_.Pop();
-    if (UpdateNullability(instruction)) {
+    HInstruction* instruction = worklist_.Pop();
+    if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) {
       AddDependentInstructionsToWorklist(instruction);
     }
   }
 }
 
-void ReferenceTypePropagation::AddToWorklist(HPhi* instruction) {
+void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) {
+  DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->GetType();
   worklist_.Add(instruction);
 }
 
-void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
+void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
   for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr) {
-      AddToWorklist(phi);
+    HInstruction* user = it.Current()->GetUser();
+    if (user->IsPhi() || user->IsBoundType()) {
+      AddToWorklist(user);
     }
   }
 }
-
 }  // namespace art
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index a74319d0c5..e346dbfc6c 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -17,31 +17,57 @@
 #ifndef ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
 #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
 
+#include "driver/dex_compilation_unit.h"
+#include "handle_scope-inl.h"
 #include "nodes.h"
 #include "optimization.h"
+#include "optimizing_compiler_stats.h"
 
 namespace art {
 
 /**
  * Propagates reference types to instructions.
- * TODO: Currently only nullability is computed.
  */
 class ReferenceTypePropagation : public HOptimization {
  public:
-  explicit ReferenceTypePropagation(HGraph* graph)
+  ReferenceTypePropagation(HGraph* graph,
+                           const DexFile& dex_file,
+                           const DexCompilationUnit& dex_compilation_unit,
+                           StackHandleScopeCollection* handles)
     : HOptimization(graph, true, "reference_type_propagation"),
+      dex_file_(dex_file),
+      dex_compilation_unit_(dex_compilation_unit),
+      handles_(handles),
       worklist_(graph->GetArena(), kDefaultWorklistSize) {}
 
   void Run() OVERRIDE;
 
  private:
+  void VisitNewInstance(HNewInstance* new_instance);
+  void VisitLoadClass(HLoadClass* load_class);
+  void VisitPhi(HPhi* phi);
   void VisitBasicBlock(HBasicBlock* block);
+
+  void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void BoundTypeForIfInstanceOf(HBasicBlock* block);
+
   void ProcessWorklist();
-  void AddToWorklist(HPhi* phi);
-  void AddDependentInstructionsToWorklist(HPhi* phi);
-  bool UpdateNullability(HPhi* phi);
+  void AddToWorklist(HInstruction* instr);
+  void AddDependentInstructionsToWorklist(HInstruction* instr);
+
+  bool UpdateNullability(HInstruction* instr);
+  bool UpdateReferenceTypeInfo(HInstruction* instr);
+
+  ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  const DexFile& dex_file_;
+  const DexCompilationUnit& dex_compilation_unit_;
+  StackHandleScopeCollection* handles_;
 
-  GrowableArray<HPhi*> worklist_;
+  GrowableArray<HInstruction*> worklist_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
 
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 3809720cb4..54e62a5b2c 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -48,7 +48,10 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
         physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
         physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()),
         temp_intervals_(allocator, 4),
-        spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        int_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        long_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        float_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        double_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
         safepoints_(allocator, 0),
         processing_core_registers_(false),
         number_of_registers_(-1),
@@ -252,8 +255,13 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
       && (instruction->GetType() != Primitive::kPrimFloat);
 
   if (locations->CanCall()) {
-    if (!instruction->IsSuspendCheck()) {
-      codegen_->MarkNotLeaf();
+    if (codegen_->IsLeafMethod()) {
+      // TODO: We do this here because we do not want the suspend check to artificially
+      // create live registers. We should find another place, but this is currently the
+      // simplest.
+      DCHECK(instruction->IsSuspendCheckEntry());
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
     }
     safepoints_.Add(instruction);
     if (locations->OnlyCallsOnSlowPath()) {
@@ -433,7 +441,7 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
     }
   }
 
-  return ValidateIntervals(intervals, spill_slots_.Size(), reserved_out_slots_, *codegen_,
+  return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
                            allocator_, processing_core_registers_, log_fatal_on_failure);
 }
 
@@ -1128,41 +1136,62 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   }
   size_t end = last_sibling->GetEnd();
 
+  GrowableArray<size_t>* spill_slots = nullptr;
+  switch (interval->GetType()) {
+    case Primitive::kPrimDouble:
+      spill_slots = &double_spill_slots_;
+      break;
+    case Primitive::kPrimLong:
+      spill_slots = &long_spill_slots_;
+      break;
+    case Primitive::kPrimFloat:
+      spill_slots = &float_spill_slots_;
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimShort:
+      spill_slots = &int_spill_slots_;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+  }
+
   // Find an available spill slot.
   size_t slot = 0;
-  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
-    // We check if it is less rather than less or equal because the parallel move
-    // resolver does not work when a single spill slot needs to be exchanged with
-    // a double spill slot. The strict comparison avoids needing to exchange these
-    // locations at the same lifetime position.
-    if (spill_slots_.Get(slot) < parent->GetStart()
-        && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) {
+  for (size_t e = spill_slots->Size(); slot < e; ++slot) {
+    if (spill_slots->Get(slot) <= parent->GetStart()
+        && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) {
       break;
     }
   }
 
   if (parent->NeedsTwoSpillSlots()) {
-    if (slot == spill_slots_.Size()) {
+    if (slot == spill_slots->Size()) {
       // We need a new spill slot.
-      spill_slots_.Add(end);
-      spill_slots_.Add(end);
-    } else if (slot == spill_slots_.Size() - 1) {
-      spill_slots_.Put(slot, end);
-      spill_slots_.Add(end);
+      spill_slots->Add(end);
+      spill_slots->Add(end);
+    } else if (slot == spill_slots->Size() - 1) {
+      spill_slots->Put(slot, end);
+      spill_slots->Add(end);
     } else {
-      spill_slots_.Put(slot, end);
-      spill_slots_.Put(slot + 1, end);
+      spill_slots->Put(slot, end);
+      spill_slots->Put(slot + 1, end);
     }
   } else {
-    if (slot == spill_slots_.Size()) {
+    if (slot == spill_slots->Size()) {
       // We need a new spill slot.
-      spill_slots_.Add(end);
+      spill_slots->Add(end);
     } else {
-      spill_slots_.Put(slot, end);
+      spill_slots->Put(slot, end);
     }
   }
 
-  parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
+  // Note that the exact spill slot location will be computed when we resolve,
+  // that is when we know the number of spill slots for each type.
+  parent->SetSpillSlot(slot);
 }
 
 static bool IsValidDestination(Location destination) {
@@ -1511,7 +1540,7 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
 }
 
 void RegisterAllocator::Resolve() {
-  codegen_->InitializeCodeGeneration(spill_slots_.Size(),
+  codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
                                      maximum_number_of_live_core_registers_,
                                      maximum_number_of_live_fp_registers_,
                                      reserved_out_slots_,
@@ -1537,6 +1566,39 @@ void RegisterAllocator::Resolve() {
       } else if (current->HasSpillSlot()) {
         current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
       }
+    } else if (current->HasSpillSlot()) {
+      // Adjust the stack slot, now that we know the number of them for each type.
+      // The way this implementation lays out the stack is the following:
+      // [parameter slots     ]
+      // [double spill slots  ]
+      // [long spill slots    ]
+      // [float spill slots   ]
+      // [int/ref values      ]
+      // [maximum out values  ] (number of arguments for calls)
+      // [art method          ].
+      uint32_t slot = current->GetSpillSlot();
+      switch (current->GetType()) {
+        case Primitive::kPrimDouble:
+          slot += long_spill_slots_.Size();
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimLong:
+          slot += float_spill_slots_.Size();
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimFloat:
+          slot += int_spill_slots_.Size();
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimNot:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimShort:
+          slot += reserved_out_slots_;
+          break;
+        case Primitive::kPrimVoid:
+          LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+      }
+      current->SetSpillSlot(slot * kVRegSize);
     }
 
     Location source = current->ToLocation();
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index b8f70bdc18..ff2f106b74 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -75,7 +75,10 @@ class RegisterAllocator {
   }
 
   size_t GetNumberOfSpillSlots() const {
-    return spill_slots_.Size();
+    return int_spill_slots_.Size()
+        + long_spill_slots_.Size()
+        + float_spill_slots_.Size()
+        + double_spill_slots_.Size();
   }
 
  private:
@@ -171,8 +174,14 @@ class RegisterAllocator {
   // where an instruction requires a temporary.
   GrowableArray<LiveInterval*> temp_intervals_;
 
-  // The spill slots allocated for live intervals.
-  GrowableArray<size_t> spill_slots_;
+  // The spill slots allocated for live intervals. We ensure spill slots
+  // are typed to avoid (1) doing moves and swaps between two different kinds
+  // of registers, and (2) swapping between a single stack slot and a double
+  // stack slot. This simplifies the parallel move resolver.
+  GrowableArray<size_t> int_spill_slots_;
+  GrowableArray<size_t> long_spill_slots_;
+  GrowableArray<size_t> float_spill_slots_;
+  GrowableArray<size_t> double_spill_slots_;
 
   // Instructions that need a safepoint.
   GrowableArray<HInstruction*> safepoints_;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 0cc00c0fde..e5d06a9f8b 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "builder.h"
 #include "code_generator.h"
 #include "code_generator_x86.h"
@@ -25,7 +26,6 @@
 #include "register_allocator.h"
 #include "ssa_liveness_analysis.h"
 #include "ssa_phi_elimination.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index c9a21aa681..3dc75059b2 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -42,20 +42,33 @@ void SsaBuilder::BuildSsa() {
     }
   }
 
-  // 3) Remove dead phis. This will remove phis that are only used by environments:
+  // 3) Mark dead phis. This will mark phis that are only used by environments:
   // at the DEX level, the type of these phis does not need to be consistent, but
   // our code generator will complain if the inputs of a phi do not have the same
-  // type (modulo the special case of `null`).
-  SsaDeadPhiElimination dead_phis(GetGraph());
-  dead_phis.Run();
+  // type. The marking allows the type propagation to know which phis it needs
+  // to handle. We mark but do not eliminate: the elimination will be done in
+  // step 5).
+  {
+    SsaDeadPhiElimination dead_phis(GetGraph());
+    dead_phis.MarkDeadPhis();
+  }
 
   // 4) Propagate types of phis. At this point, phis are typed void in the general
-  // case, or float or double when we created a floating-point equivalent. So we
+  // case, or float/double/reference when we created an equivalent phi. So we
   // need to propagate the types across phis to give them a correct type.
   PrimitiveTypePropagation type_propagation(GetGraph());
   type_propagation.Run();
 
-  // 5) Clear locals.
+  // 5) Step 4) changes inputs of phis which may lead to dead phis again. We re-run
+  // the algorithm and this time elimimates them.
+  // TODO: Make this work with debug info and reference liveness. We currently
+  // eagerly remove phis used in environments.
+  {
+    SsaDeadPhiElimination dead_phis(GetGraph());
+    dead_phis.Run();
+  }
+
+  // 6) Clear locals.
   // TODO: Move this to a dead code eliminator phase.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
@@ -185,15 +198,24 @@ static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) {
 
 /**
  * Because of Dex format, we might end up having the same phi being
- * used for non floating point operations and floating point operations. Because
- * we want the graph to be correctly typed (and thereafter avoid moves between
+ * used for non floating point operations and floating point / reference operations.
+ * Because we want the graph to be correctly typed (and thereafter avoid moves between
  * floating point registers and core registers), we need to create a copy of the
- * phi with a floating point type.
+ * phi with a floating point / reference type.
  */
-static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
-  // We place the floating point phi next to this phi.
+static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+  // We place the floating point /reference phi next to this phi.
   HInstruction* next = phi->GetNext();
-  if (next == nullptr || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())) {
+  if (next != nullptr
+      && next->AsPhi()->GetRegNumber() == phi->GetRegNumber()
+      && next->GetType() != type) {
+    // Move to the next phi to see if it is the one we are looking for.
+    next = next->GetNext();
+  }
+
+  if (next == nullptr
+      || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
+      || (next->GetType() != type)) {
     ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
@@ -223,7 +245,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
   } else if (value->IsIntConstant()) {
     return GetFloatEquivalent(value->AsIntConstant());
   } else if (value->IsPhi()) {
-    return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type);
+    return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type);
   } else {
     // For other instructions, we assume the verifier has checked that the dex format is correctly
     // typed and the value in a dex register will not be used for both floating point and
@@ -234,12 +256,25 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
   }
 }
 
+HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
+  if (value->IsIntConstant()) {
+    DCHECK_EQ(value->AsIntConstant()->GetValue(), 0);
+    return value->GetBlock()->GetGraph()->GetNullConstant();
+  } else {
+    DCHECK(value->IsPhi());
+    return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
+  }
+}
+
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
   HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
-  if (load->GetType() != value->GetType()
-      && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) {
-    // If the operation requests a specific type, we make sure its input is of that type.
-    value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+  // If the operation requests a specific type, we make sure its input is of that type.
+  if (load->GetType() != value->GetType()) {
+    if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
+      value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+    } else if (load->GetType() == Primitive::kPrimNot) {
+      value = GetReferenceTypeEquivalent(value);
+    }
   }
   load->ReplaceWith(value);
   load->GetBlock()->RemoveInstruction(load);
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 2eec87b618..148e9590c3 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,6 +58,8 @@ class SsaBuilder : public HGraphVisitor {
                                                   HInstruction* instruction,
                                                   Primitive::Type type);
 
+  static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+
  private:
   // Locals for the current block being visited.
   HEnvironment* current_locals_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 1b06315fce..bebb73ba22 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -115,14 +115,13 @@ void SsaLivenessAnalysis::NumberInstructions() {
   // to differentiate between the start and end of an instruction. Adding 2 to
   // the lifetime position for each instruction ensures the start of an
   // instruction is different than the end of the previous instruction.
-  HGraphVisitor* location_builder = codegen_->GetLocationBuilder();
   for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     block->SetLifetimeStart(lifetime_position);
 
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       HInstruction* current = inst_it.Current();
-      current->Accept(location_builder);
+      codegen_->AllocateLocations(current);
       LocationSummary* locations = current->GetLocations();
       if (locations != nullptr && locations->Out().IsValid()) {
         instructions_from_ssa_index_.Add(current);
@@ -140,7 +139,7 @@ void SsaLivenessAnalysis::NumberInstructions() {
     for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
          inst_it.Advance()) {
       HInstruction* current = inst_it.Current();
-      current->Accept(codegen_->GetLocationBuilder());
+      codegen_->AllocateLocations(current);
       LocationSummary* locations = current->GetLocations();
       if (locations != nullptr && locations->Out().IsValid()) {
         instructions_from_ssa_index_.Add(current);
@@ -312,7 +311,12 @@ bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) {
   return live_in->UnionIfNotIn(live_out, kill);
 }
 
+static int RegisterOrLowRegister(Location location) {
+  return location.IsPair() ? location.low() : location.reg();
+}
+
 int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
+  DCHECK(!IsHighInterval());
   if (GetParent() == this && defined_by_ != nullptr) {
     // This is the first interval for the instruction. Try to find
     // a register based on its definition.
@@ -334,8 +338,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
       if (user->IsPhi()) {
         // If the phi has a register, try to use the same.
         Location phi_location = user->GetLiveInterval()->ToLocation();
-        if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) {
-          return phi_location.reg();
+        if (phi_location.IsRegisterKind()) {
+          DCHECK(SameRegisterKind(phi_location));
+          int reg = RegisterOrLowRegister(phi_location);
+          if (free_until[reg] >= use_position) {
+            return reg;
+          }
         }
         const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors();
         // If the instruction dies at the phi assignment, we can try having the
@@ -348,8 +356,11 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
             HInstruction* input = user->InputAt(i);
             Location location = input->GetLiveInterval()->GetLocationAt(
                 predecessors.Get(i)->GetLifetimeEnd() - 1);
-            if (location.IsRegister() && free_until[location.reg()] >= use_position) {
-              return location.reg();
+            if (location.IsRegisterKind()) {
+              int reg = RegisterOrLowRegister(location);
+              if (free_until[reg] >= use_position) {
+                return reg;
+              }
             }
           }
         }
@@ -360,8 +371,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
         // We use the user's lifetime position - 1 (and not `use_position`) because the
         // register is blocked at the beginning of the user.
         size_t position = user->GetLifetimePosition() - 1;
-        if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) {
-          return expected.reg();
+        if (expected.IsRegisterKind()) {
+          DCHECK(SameRegisterKind(expected));
+          int reg = RegisterOrLowRegister(expected);
+          if (free_until[reg] >= position) {
+            return reg;
+          }
         }
       }
     }
@@ -383,8 +398,9 @@ int LiveInterval::FindHintAtDefinition() const {
         // If the input dies at the end of the predecessor, we know its register can
         // be reused.
         Location input_location = input_interval.ToLocation();
-        if (SameRegisterKind(input_location)) {
-          return input_location.reg();
+        if (input_location.IsRegisterKind()) {
+          DCHECK(SameRegisterKind(input_location));
+          return RegisterOrLowRegister(input_location);
         }
       }
     }
@@ -399,8 +415,9 @@ int LiveInterval::FindHintAtDefinition() const {
         // If the input dies at the start of this instruction, we know its register can
         // be reused.
         Location location = input_interval.ToLocation();
-        if (SameRegisterKind(location)) {
-          return location.reg();
+        if (location.IsRegisterKind()) {
+          DCHECK(SameRegisterKind(location));
+          return RegisterOrLowRegister(location);
         }
       }
     }
@@ -409,9 +426,19 @@ int LiveInterval::FindHintAtDefinition() const {
 }
 
 bool LiveInterval::SameRegisterKind(Location other) const {
-  return IsFloatingPoint()
-      ? other.IsFpuRegister()
-      : other.IsRegister();
+  if (IsFloatingPoint()) {
+    if (IsLowInterval() || IsHighInterval()) {
+      return other.IsFpuRegisterPair();
+    } else {
+      return other.IsFpuRegister();
+    }
+  } else {
+    if (IsLowInterval() || IsHighInterval()) {
+      return other.IsRegisterPair();
+    } else {
+      return other.IsRegister();
+    }
+  }
 }
 
 bool LiveInterval::NeedsTwoSpillSlots() const {
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index fd30c1bc76..2f2e2d1fab 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -19,6 +19,11 @@
 namespace art {
 
 void SsaDeadPhiElimination::Run() {
+  MarkDeadPhis();
+  EliminateDeadPhis();
+}
+
+void SsaDeadPhiElimination::MarkDeadPhis() {
   // Add to the worklist phis referenced by non-phi instructions.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
@@ -49,7 +54,9 @@ void SsaDeadPhiElimination::Run() {
       }
     }
   }
+}
 
+void SsaDeadPhiElimination::EliminateDeadPhis() {
   // Remove phis that are not live. Visit in post order so that phis
   // that are not inputs of loop phis can be removed when they have
   // no users left (dead phis might use dead phis).
@@ -57,31 +64,33 @@ void SsaDeadPhiElimination::Run() {
     HBasicBlock* block = it.Current();
     HInstruction* current = block->GetFirstPhi();
     HInstruction* next = nullptr;
+    HPhi* phi;
     while (current != nullptr) {
+      phi = current->AsPhi();
       next = current->GetNext();
-      if (current->AsPhi()->IsDead()) {
-        if (current->HasUses()) {
-          for (HUseIterator<HInstruction*> use_it(current->GetUses()); !use_it.Done();
+      if (phi->IsDead()) {
+        // Make sure the phi is only used by other dead phis.
+        if (kIsDebugBuild) {
+          for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done();
                use_it.Advance()) {
-            HUseListNode<HInstruction*>* user_node = use_it.Current();
-            HInstruction* user = user_node->GetUser();
+            HInstruction* user = use_it.Current()->GetUser();
             DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
             DCHECK(user->AsPhi()->IsDead()) << user->GetId();
-            // Just put itself as an input. The phi will be removed in this loop anyway.
-            user->SetRawInputAt(user_node->GetIndex(), user);
-            current->RemoveUser(user, user_node->GetIndex());
           }
         }
-        if (current->HasEnvironmentUses()) {
-          for (HUseIterator<HEnvironment*> use_it(current->GetEnvUses()); !use_it.Done();
-               use_it.Advance()) {
-            HUseListNode<HEnvironment*>* user_node = use_it.Current();
-            HEnvironment* user = user_node->GetUser();
-            user->SetRawEnvAt(user_node->GetIndex(), nullptr);
-            current->RemoveEnvironmentUser(user_node);
-          }
+        // Remove the phi from use lists of its inputs.
+        for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+          phi->RemoveAsUserOfInput(i);
+        }
+        // Remove the phi from environments that use it.
+        for (HUseIterator<HEnvironment*> use_it(phi->GetEnvUses()); !use_it.Done();
+             use_it.Advance()) {
+          HUseListNode<HEnvironment*>* user_node = use_it.Current();
+          HEnvironment* user = user_node->GetUser();
+          user->SetRawEnvAt(user_node->GetIndex(), nullptr);
         }
-        block->RemovePhi(current->AsPhi());
+        // Delete it from the instruction list.
+        block->RemovePhi(phi, /*ensure_safety=*/ false);
       }
       current = next;
     }
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index b7899712d6..88a5279e14 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -34,6 +34,9 @@ class SsaDeadPhiElimination : public HOptimization {
 
   void Run() OVERRIDE;
 
+  void MarkDeadPhis();
+  void EliminateDeadPhis();
+
  private:
   GrowableArray<HPhi*> worklist_;
 
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 7e90b37fe6..7fc1ec6dd1 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "base/stringprintf.h"
 #include "builder.h"
 #include "dex_file.h"
@@ -22,7 +23,6 @@
 #include "optimizing_unit_test.h"
 #include "pretty_printer.h"
 #include "ssa_builder.h"
-#include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 3974e53e6f..5283d5dcca 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -166,18 +166,23 @@ class StackMapStream : public ValueObject {
         stack_map.SetStackMask(*entry.sp_mask);
       }
 
-      // Set the register map.
-      MemoryRegion register_region = dex_register_maps_region.Subregion(
-          next_dex_register_map_offset,
-          DexRegisterMap::kFixedSize + entry.num_dex_registers * DexRegisterMap::SingleEntrySize());
-      next_dex_register_map_offset += register_region.size();
-      DexRegisterMap dex_register_map(register_region);
-      stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start);
-
-      for (size_t j = 0; j < entry.num_dex_registers; ++j) {
-        DexRegisterEntry register_entry =
-            dex_register_maps_.Get(j + entry.dex_register_maps_start_index);
-        dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value);
+      if (entry.num_dex_registers != 0) {
+        // Set the register map.
+        MemoryRegion register_region = dex_register_maps_region.Subregion(
+            next_dex_register_map_offset,
+            DexRegisterMap::kFixedSize
+            + entry.num_dex_registers * DexRegisterMap::SingleEntrySize());
+        next_dex_register_map_offset += register_region.size();
+        DexRegisterMap dex_register_map(register_region);
+        stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start);
+
+        for (size_t j = 0; j < entry.num_dex_registers; ++j) {
+          DexRegisterEntry register_entry =
+              dex_register_maps_.Get(j + entry.dex_register_maps_start_index);
+          dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value);
+        }
+      } else {
+        stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap);
       }
 
       // Set the inlining info.
@@ -196,7 +201,7 @@ class StackMapStream : public ValueObject {
           inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index);
         }
       } else {
-        stack_map.SetInlineDescriptorOffset(InlineInfo::kNoInlineInfo);
+        stack_map.SetInlineDescriptorOffset(StackMap::kNoInlineInfo);
       }
     }
   }
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 5ee6ae049c..744fb45fff 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -61,6 +61,7 @@ TEST(StackMapTest, Test1) {
   MemoryRegion stack_mask = stack_map.GetStackMask();
   ASSERT_TRUE(SameBits(stack_mask, sp_mask));
 
+  ASSERT_TRUE(stack_map.HasDexRegisterMap());
   DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
   ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0));
   ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1));
@@ -107,6 +108,7 @@ TEST(StackMapTest, Test2) {
   MemoryRegion stack_mask = stack_map.GetStackMask();
   ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
 
+  ASSERT_TRUE(stack_map.HasDexRegisterMap());
   DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
   ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0));
   ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1));
diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc
deleted file mode 100644
index a80ad938a6..0000000000
--- a/compiler/utils/arena_allocator.cc
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <iomanip>
-#include <numeric>
-
-#include "arena_allocator.h"
-#include "base/logging.h"
-#include "base/mutex.h"
-#include "thread-inl.h"
-#include <memcheck/memcheck.h>
-
-namespace art {
-
-// Memmap is a bit slower than malloc according to my measurements.
-static constexpr bool kUseMemMap = false;
-static constexpr bool kUseMemSet = true && kUseMemMap;
-static constexpr size_t kValgrindRedZoneBytes = 8;
-constexpr size_t Arena::kDefaultSize;
-
-template <bool kCount>
-const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
-  "Misc       ",
-  "BasicBlock ",
-  "BBList     "
-  "BBPreds    ",
-  "DfsPreOrd  ",
-  "DfsPostOrd ",
-  "DomPostOrd ",
-  "TopoOrd    ",
-  "Lowering   ",
-  "LIR        ",
-  "LIR masks  ",
-  "SwitchTbl  ",
-  "FillArray  ",
-  "SlowPaths  ",
-  "MIR        ",
-  "DataFlow   ",
-  "GrowList   ",
-  "GrowBitMap ",
-  "SSA2Dalvik ",
-  "Dalvik2SSA ",
-  "DebugInfo  ",
-  "Successor  ",
-  "RegAlloc   ",
-  "Data       ",
-  "Preds      ",
-  "STL        ",
-};
-
-template <bool kCount>
-ArenaAllocatorStatsImpl<kCount>::ArenaAllocatorStatsImpl()
-    : num_allocations_(0u) {
-  std::fill_n(alloc_stats_, arraysize(alloc_stats_), 0u);
-}
-
-template <bool kCount>
-void ArenaAllocatorStatsImpl<kCount>::Copy(const ArenaAllocatorStatsImpl& other) {
-  num_allocations_ = other.num_allocations_;
-  std::copy(other.alloc_stats_, other.alloc_stats_ + arraysize(alloc_stats_), alloc_stats_);
-}
-
-template <bool kCount>
-void ArenaAllocatorStatsImpl<kCount>::RecordAlloc(size_t bytes, ArenaAllocKind kind) {
-  alloc_stats_[kind] += bytes;
-  ++num_allocations_;
-}
-
-template <bool kCount>
-size_t ArenaAllocatorStatsImpl<kCount>::NumAllocations() const {
-  return num_allocations_;
-}
-
-template <bool kCount>
-size_t ArenaAllocatorStatsImpl<kCount>::BytesAllocated() const {
-  const size_t init = 0u;  // Initial value of the correct type.
-  return std::accumulate(alloc_stats_, alloc_stats_ + arraysize(alloc_stats_), init);
-}
-
-template <bool kCount>
-void ArenaAllocatorStatsImpl<kCount>::Dump(std::ostream& os, const Arena* first,
-                                           ssize_t lost_bytes_adjustment) const {
-  size_t malloc_bytes = 0u;
-  size_t lost_bytes = 0u;
-  size_t num_arenas = 0u;
-  for (const Arena* arena = first; arena != nullptr; arena = arena->next_) {
-    malloc_bytes += arena->Size();
-    lost_bytes += arena->RemainingSpace();
-    ++num_arenas;
-  }
-  // The lost_bytes_adjustment is used to make up for the fact that the current arena
-  // may not have the bytes_allocated_ updated correctly.
-  lost_bytes += lost_bytes_adjustment;
-  const size_t bytes_allocated = BytesAllocated();
-  os << " MEM: used: " << bytes_allocated << ", allocated: " << malloc_bytes
-     << ", lost: " << lost_bytes << "\n";
-  size_t num_allocations = NumAllocations();
-  if (num_allocations != 0) {
-    os << "Number of arenas allocated: " << num_arenas << ", Number of allocations: "
-       << num_allocations << ", avg size: " << bytes_allocated / num_allocations << "\n";
-  }
-  os << "===== Allocation by kind\n";
-  static_assert(arraysize(kAllocNames) == kNumArenaAllocKinds, "arraysize of kAllocNames");
-  for (int i = 0; i < kNumArenaAllocKinds; i++) {
-      os << kAllocNames[i] << std::setw(10) << alloc_stats_[i] << "\n";
-  }
-}
-
-// Explicitly instantiate the used implementation.
-template class ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations>;
-
-Arena::Arena(size_t size)
-    : bytes_allocated_(0),
-      map_(nullptr),
-      next_(nullptr) {
-  if (kUseMemMap) {
-    std::string error_msg;
-    map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE, false,
-                                &error_msg);
-    CHECK(map_ != nullptr) << error_msg;
-    memory_ = map_->Begin();
-    size_ = map_->Size();
-  } else {
-    memory_ = reinterpret_cast<uint8_t*>(calloc(1, size));
-    size_ = size;
-  }
-}
-
-Arena::~Arena() {
-  if (kUseMemMap) {
-    delete map_;
-  } else {
-    free(reinterpret_cast<void*>(memory_));
-  }
-}
-
-void Arena::Reset() {
-  if (bytes_allocated_) {
-    if (kUseMemSet || !kUseMemMap) {
-      memset(Begin(), 0, bytes_allocated_);
-    } else {
-      map_->MadviseDontNeedAndZero();
-    }
-    bytes_allocated_ = 0;
-  }
-}
-
-ArenaPool::ArenaPool()
-    : lock_("Arena pool lock"),
-      free_arenas_(nullptr) {
-}
-
-ArenaPool::~ArenaPool() {
-  while (free_arenas_ != nullptr) {
-    auto* arena = free_arenas_;
-    free_arenas_ = free_arenas_->next_;
-    delete arena;
-  }
-}
-
-Arena* ArenaPool::AllocArena(size_t size) {
-  Thread* self = Thread::Current();
-  Arena* ret = nullptr;
-  {
-    MutexLock lock(self, lock_);
-    if (free_arenas_ != nullptr && LIKELY(free_arenas_->Size() >= size)) {
-      ret = free_arenas_;
-      free_arenas_ = free_arenas_->next_;
-    }
-  }
-  if (ret == nullptr) {
-    ret = new Arena(size);
-  }
-  ret->Reset();
-  return ret;
-}
-
-size_t ArenaPool::GetBytesAllocated() const {
-  size_t total = 0;
-  MutexLock lock(Thread::Current(), lock_);
-  for (Arena* arena = free_arenas_; arena != nullptr; arena = arena->next_) {
-    total += arena->GetBytesAllocated();
-  }
-  return total;
-}
-
-void ArenaPool::FreeArenaChain(Arena* first) {
-  if (UNLIKELY(RUNNING_ON_VALGRIND > 0)) {
-    for (Arena* arena = first; arena != nullptr; arena = arena->next_) {
-      VALGRIND_MAKE_MEM_UNDEFINED(arena->memory_, arena->bytes_allocated_);
-    }
-  }
-  if (first != nullptr) {
-    Arena* last = first;
-    while (last->next_ != nullptr) {
-      last = last->next_;
-    }
-    Thread* self = Thread::Current();
-    MutexLock lock(self, lock_);
-    last->next_ = free_arenas_;
-    free_arenas_ = first;
-  }
-}
-
-size_t ArenaAllocator::BytesAllocated() const {
-  return ArenaAllocatorStats::BytesAllocated();
-}
-
-ArenaAllocator::ArenaAllocator(ArenaPool* pool)
-  : pool_(pool),
-    begin_(nullptr),
-    end_(nullptr),
-    ptr_(nullptr),
-    arena_head_(nullptr),
-    running_on_valgrind_(RUNNING_ON_VALGRIND > 0) {
-}
-
-void ArenaAllocator::UpdateBytesAllocated() {
-  if (arena_head_ != nullptr) {
-    // Update how many bytes we have allocated into the arena so that the arena pool knows how
-    // much memory to zero out.
-    arena_head_->bytes_allocated_ = ptr_ - begin_;
-  }
-}
-
-void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8);
-  if (UNLIKELY(ptr_ + rounded_bytes > end_)) {
-    // Obtain a new block.
-    ObtainNewArenaForAllocation(rounded_bytes);
-    if (UNLIKELY(ptr_ == nullptr)) {
-      return nullptr;
-    }
-  }
-  ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
-  uint8_t* ret = ptr_;
-  ptr_ += rounded_bytes;
-  // Check that the memory is already zeroed out.
-  for (uint8_t* ptr = ret; ptr < ptr_; ++ptr) {
-    CHECK_EQ(*ptr, 0U);
-  }
-  VALGRIND_MAKE_MEM_NOACCESS(ret + bytes, rounded_bytes - bytes);
-  return ret;
-}
-
-ArenaAllocator::~ArenaAllocator() {
-  // Reclaim all the arenas by giving them back to the thread pool.
-  UpdateBytesAllocated();
-  pool_->FreeArenaChain(arena_head_);
-}
-
-void ArenaAllocator::ObtainNewArenaForAllocation(size_t allocation_size) {
-  UpdateBytesAllocated();
-  Arena* new_arena = pool_->AllocArena(std::max(Arena::kDefaultSize, allocation_size));
-  new_arena->next_ = arena_head_;
-  arena_head_ = new_arena;
-  // Update our internal data structures.
-  ptr_ = begin_ = new_arena->Begin();
-  end_ = new_arena->End();
-}
-
-MemStats::MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena,
-                   ssize_t lost_bytes_adjustment)
-    : name_(name),
-      stats_(stats),
-      first_arena_(first_arena),
-      lost_bytes_adjustment_(lost_bytes_adjustment) {
-}
-
-void MemStats::Dump(std::ostream& os) const {
-  os << name_ << " stats:\n";
-  stats_->Dump(os, first_arena_, lost_bytes_adjustment_);
-}
-
-// Dump memory usage stats.
-MemStats ArenaAllocator::GetMemStats() const {
-  ssize_t lost_bytes_adjustment =
-      (arena_head_ == nullptr) ? 0 : (end_ - ptr_) - arena_head_->RemainingSpace();
-  return MemStats("ArenaAllocator", this, arena_head_, lost_bytes_adjustment);
-}
-
-}  // namespace art
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
deleted file mode 100644
index 7f5bc9ac4c..0000000000
--- a/compiler/utils/arena_allocator.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_
-#define ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_
-
-#include <stdint.h>
-#include <stddef.h>
-
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "mem_map.h"
-#include "utils.h"
-#include "utils/debug_stack.h"
-
-namespace art {
-
-class Arena;
-class ArenaPool;
-class ArenaAllocator;
-class ArenaStack;
-class ScopedArenaAllocator;
-class MemStats;
-
-template <typename T>
-class ArenaAllocatorAdapter;
-
-static constexpr bool kArenaAllocatorCountAllocations = false;
-
-// Type of allocation for memory tuning.
-enum ArenaAllocKind {
-  kArenaAllocMisc,
-  kArenaAllocBB,
-  kArenaAllocBBList,
-  kArenaAllocBBPredecessors,
-  kArenaAllocDfsPreOrder,
-  kArenaAllocDfsPostOrder,
-  kArenaAllocDomPostOrder,
-  kArenaAllocTopologicalSortOrder,
-  kArenaAllocLoweringInfo,
-  kArenaAllocLIR,
-  kArenaAllocLIRResourceMask,
-  kArenaAllocSwitchTable,
-  kArenaAllocFillArrayData,
-  kArenaAllocSlowPaths,
-  kArenaAllocMIR,
-  kArenaAllocDFInfo,
-  kArenaAllocGrowableArray,
-  kArenaAllocGrowableBitMap,
-  kArenaAllocSSAToDalvikMap,
-  kArenaAllocDalvikToSSAMap,
-  kArenaAllocDebugInfo,
-  kArenaAllocSuccessor,
-  kArenaAllocRegAlloc,
-  kArenaAllocData,
-  kArenaAllocPredecessors,
-  kArenaAllocSTL,
-  kNumArenaAllocKinds
-};
-
-template <bool kCount>
-class ArenaAllocatorStatsImpl;
-
-template <>
-class ArenaAllocatorStatsImpl<false> {
- public:
-  ArenaAllocatorStatsImpl() = default;
-  ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default;
-  ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete;
-
-  void Copy(const ArenaAllocatorStatsImpl& other) { UNUSED(other); }
-  void RecordAlloc(size_t bytes, ArenaAllocKind kind) { UNUSED(bytes, kind); }
-  size_t NumAllocations() const { return 0u; }
-  size_t BytesAllocated() const { return 0u; }
-  void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const {
-    UNUSED(os); UNUSED(first); UNUSED(lost_bytes_adjustment);
-  }
-};
-
-template <bool kCount>
-class ArenaAllocatorStatsImpl {
- public:
-  ArenaAllocatorStatsImpl();
-  ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default;
-  ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete;
-
-  void Copy(const ArenaAllocatorStatsImpl& other);
-  void RecordAlloc(size_t bytes, ArenaAllocKind kind);
-  size_t NumAllocations() const;
-  size_t BytesAllocated() const;
-  void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const;
-
- private:
-  size_t num_allocations_;
-  // TODO: Use std::array<size_t, kNumArenaAllocKinds> from C++11 when we upgrade the STL.
-  size_t alloc_stats_[kNumArenaAllocKinds];  // Bytes used by various allocation kinds.
-
-  static const char* const kAllocNames[];
-};
-
-typedef ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations> ArenaAllocatorStats;
-
-class Arena {
- public:
-  static constexpr size_t kDefaultSize = 128 * KB;
-  explicit Arena(size_t size = kDefaultSize);
-  ~Arena();
-  void Reset();
-  uint8_t* Begin() {
-    return memory_;
-  }
-
-  uint8_t* End() {
-    return memory_ + size_;
-  }
-
-  size_t Size() const {
-    return size_;
-  }
-
-  size_t RemainingSpace() const {
-    return Size() - bytes_allocated_;
-  }
-
-  size_t GetBytesAllocated() const {
-    return bytes_allocated_;
-  }
-
- private:
-  size_t bytes_allocated_;
-  uint8_t* memory_;
-  size_t size_;
-  MemMap* map_;
-  Arena* next_;
-  friend class ArenaPool;
-  friend class ArenaAllocator;
-  friend class ArenaStack;
-  friend class ScopedArenaAllocator;
-  template <bool kCount> friend class ArenaAllocatorStatsImpl;
-  DISALLOW_COPY_AND_ASSIGN(Arena);
-};
-
-class ArenaPool {
- public:
-  ArenaPool();
-  ~ArenaPool();
-  Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_);
-  void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_);
-  size_t GetBytesAllocated() const LOCKS_EXCLUDED(lock_);
-
- private:
-  mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Arena* free_arenas_ GUARDED_BY(lock_);
-  DISALLOW_COPY_AND_ASSIGN(ArenaPool);
-};
-
-class ArenaAllocator : private DebugStackRefCounter, private ArenaAllocatorStats {
- public:
-  explicit ArenaAllocator(ArenaPool* pool);
-  ~ArenaAllocator();
-
-  // Get adapter for use in STL containers. See arena_containers.h .
-  ArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL);
-
-  // Returns zeroed memory.
-  void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE {
-    if (UNLIKELY(running_on_valgrind_)) {
-      return AllocValgrind(bytes, kind);
-    }
-    bytes = RoundUp(bytes, 8);
-    if (UNLIKELY(ptr_ + bytes > end_)) {
-      // Obtain a new block.
-      ObtainNewArenaForAllocation(bytes);
-      if (UNLIKELY(ptr_ == nullptr)) {
-        return nullptr;
-      }
-    }
-    ArenaAllocatorStats::RecordAlloc(bytes, kind);
-    uint8_t* ret = ptr_;
-    ptr_ += bytes;
-    return ret;
-  }
-
-  template <typename T> T* AllocArray(size_t length) {
-    return static_cast<T*>(Alloc(length * sizeof(T), kArenaAllocMisc));
-  }
-
-  void* AllocValgrind(size_t bytes, ArenaAllocKind kind);
-  void ObtainNewArenaForAllocation(size_t allocation_size);
-  size_t BytesAllocated() const;
-  MemStats GetMemStats() const;
-
- private:
-  void UpdateBytesAllocated();
-
-  ArenaPool* pool_;
-  uint8_t* begin_;
-  uint8_t* end_;
-  uint8_t* ptr_;
-  Arena* arena_head_;
-  bool running_on_valgrind_;
-
-  template <typename U>
-  friend class ArenaAllocatorAdapter;
-
-  DISALLOW_COPY_AND_ASSIGN(ArenaAllocator);
-};  // ArenaAllocator
-
-class MemStats {
- public:
-  MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena,
-           ssize_t lost_bytes_adjustment = 0);
-  void Dump(std::ostream& os) const;
-
- private:
-  const char* const name_;
-  const ArenaAllocatorStats* const stats_;
-  const Arena* const first_arena_;
-  const ssize_t lost_bytes_adjustment_;
-};  // MemStats
-
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_
diff --git a/compiler/utils/arena_allocator_test.cc b/compiler/utils/arena_allocator_test.cc
index 71565407a2..706552739f 100644
--- a/compiler/utils/arena_allocator_test.cc
+++ b/compiler/utils/arena_allocator_test.cc
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
+#include "base/arena_allocator.h"
 #include "gtest/gtest.h"
-#include "utils/arena_allocator.h"
 #include "utils/arena_bit_vector.h"
 
 namespace art {
diff --git a/compiler/utils/arena_bit_vector.cc b/compiler/utils/arena_bit_vector.cc
index f17e5a92a4..ddc0c818c2 100644
--- a/compiler/utils/arena_bit_vector.cc
+++ b/compiler/utils/arena_bit_vector.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "arena_allocator.h"
 #include "arena_bit_vector.h"
 
+#include "base/arena_allocator.h"
+
 namespace art {
 
 template <typename ArenaAlloc>
diff --git a/compiler/utils/arena_bit_vector.h b/compiler/utils/arena_bit_vector.h
index 34f1ca9129..f2a74527da 100644
--- a/compiler/utils/arena_bit_vector.h
+++ b/compiler/utils/arena_bit_vector.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_
 #define ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_
 
-#include "arena_object.h"
+#include "base/arena_object.h"
 #include "base/bit_vector.h"
 
 namespace art {
@@ -35,14 +35,10 @@ enum OatBitMapKind {
   kBitMapDominators,
   kBitMapIDominated,
   kBitMapDomFrontier,
-  kBitMapPhi,
-  kBitMapTmpBlocks,
-  kBitMapInputBlocks,
   kBitMapRegisterV,
   kBitMapTempSSARegisterV,
   kBitMapNullCheck,
   kBitMapClInitCheck,
-  kBitMapTmpBlockV,
   kBitMapPredecessors,
   kNumBitMapKinds
 };
diff --git a/compiler/utils/arena_containers.h b/compiler/utils/arena_containers.h
deleted file mode 100644
index 825259157a..0000000000
--- a/compiler/utils/arena_containers.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARENA_CONTAINERS_H_
-#define ART_COMPILER_UTILS_ARENA_CONTAINERS_H_
-
-#include <deque>
-#include <queue>
-#include <set>
-#include <vector>
-
-#include "utils/arena_allocator.h"
-#include "safe_map.h"
-
-namespace art {
-
-// Adapter for use of ArenaAllocator in STL containers.
-// Use ArenaAllocator::Adapter() to create an adapter to pass to container constructors.
-// For example,
-//   struct Foo {
-//     explicit Foo(ArenaAllocator* allocator)
-//         : foo_vector(allocator->Adapter(kArenaAllocMisc)),
-//           foo_map(std::less<int>(), allocator->Adapter()) {
-//     }
-//     ArenaVector<int> foo_vector;
-//     ArenaSafeMap<int, int> foo_map;
-//   };
-template <typename T>
-class ArenaAllocatorAdapter;
-
-template <typename T>
-using ArenaDeque = std::deque<T, ArenaAllocatorAdapter<T>>;
-
-template <typename T>
-using ArenaQueue = std::queue<T, ArenaDeque<T>>;
-
-template <typename T>
-using ArenaVector = std::vector<T, ArenaAllocatorAdapter<T>>;
-
-template <typename T, typename Comparator = std::less<T>>
-using ArenaSet = std::set<T, Comparator, ArenaAllocatorAdapter<T>>;
-
-template <typename K, typename V, typename Comparator = std::less<K>>
-using ArenaSafeMap =
-    SafeMap<K, V, Comparator, ArenaAllocatorAdapter<std::pair<const K, V>>>;
-
-// Implementation details below.
-
-template <bool kCount>
-class ArenaAllocatorAdapterKindImpl;
-
-template <>
-class ArenaAllocatorAdapterKindImpl<false> {
- public:
-  // Not tracking allocations, ignore the supplied kind and arbitrarily provide kArenaAllocSTL.
-  explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) { UNUSED(kind); }
-  ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl& other) = default;
-  ArenaAllocKind Kind() { return kArenaAllocSTL; }
-};
-
-template <bool kCount>
-class ArenaAllocatorAdapterKindImpl {
- public:
-  explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) : kind_(kind) { }
-  ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl& other) = default;
-  ArenaAllocKind Kind() { return kind_; }
-
- private:
-  ArenaAllocKind kind_;
-};
-
-typedef ArenaAllocatorAdapterKindImpl<kArenaAllocatorCountAllocations> ArenaAllocatorAdapterKind;
-
-template <>
-class ArenaAllocatorAdapter<void>
-    : private DebugStackReference, private ArenaAllocatorAdapterKind {
- public:
-  typedef void value_type;
-  typedef void* pointer;
-  typedef const void* const_pointer;
-
-  template <typename U>
-  struct rebind {
-    typedef ArenaAllocatorAdapter<U> other;
-  };
-
-  explicit ArenaAllocatorAdapter(ArenaAllocator* arena_allocator,
-                                 ArenaAllocKind kind = kArenaAllocSTL)
-      : DebugStackReference(arena_allocator),
-        ArenaAllocatorAdapterKind(kind),
-        arena_allocator_(arena_allocator) {
-  }
-  template <typename U>
-  ArenaAllocatorAdapter(const ArenaAllocatorAdapter<U>& other)
-      : DebugStackReference(other),
-        ArenaAllocatorAdapterKind(other),
-        arena_allocator_(other.arena_allocator_) {
-  }
-  ArenaAllocatorAdapter(const ArenaAllocatorAdapter& other) = default;
-  ArenaAllocatorAdapter& operator=(const ArenaAllocatorAdapter& other) = default;
-  ~ArenaAllocatorAdapter() = default;
-
- private:
-  ArenaAllocator* arena_allocator_;
-
-  template <typename U>
-  friend class ArenaAllocatorAdapter;
-};
-
-template <typename T>
-class ArenaAllocatorAdapter : private DebugStackReference, private ArenaAllocatorAdapterKind {
- public:
-  typedef T value_type;
-  typedef T* pointer;
-  typedef T& reference;
-  typedef const T* const_pointer;
-  typedef const T& const_reference;
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-
-  template <typename U>
-  struct rebind {
-    typedef ArenaAllocatorAdapter<U> other;
-  };
-
-  explicit ArenaAllocatorAdapter(ArenaAllocator* arena_allocator, ArenaAllocKind kind)
-      : DebugStackReference(arena_allocator),
-        ArenaAllocatorAdapterKind(kind),
-        arena_allocator_(arena_allocator) {
-  }
-  template <typename U>
-  ArenaAllocatorAdapter(const ArenaAllocatorAdapter<U>& other)
-      : DebugStackReference(other),
-        ArenaAllocatorAdapterKind(other),
-        arena_allocator_(other.arena_allocator_) {
-  }
-  ArenaAllocatorAdapter(const ArenaAllocatorAdapter& other) = default;
-  ArenaAllocatorAdapter& operator=(const ArenaAllocatorAdapter& other) = default;
-  ~ArenaAllocatorAdapter() = default;
-
-  size_type max_size() const {
-    return static_cast<size_type>(-1) / sizeof(T);
-  }
-
-  pointer address(reference x) const { return &x; }
-  const_pointer address(const_reference x) const { return &x; }
-
-  pointer allocate(size_type n, ArenaAllocatorAdapter<void>::pointer hint = nullptr) {
-    UNUSED(hint);
-    DCHECK_LE(n, max_size());
-    return reinterpret_cast<T*>(arena_allocator_->Alloc(n * sizeof(T),
-                                                        ArenaAllocatorAdapterKind::Kind()));
-  }
-  void deallocate(pointer p, size_type n) {
-    UNUSED(p, n);
-  }
-
-  void construct(pointer p, const_reference val) {
-    new (static_cast<void*>(p)) value_type(val);
-  }
-  void destroy(pointer p) {
-    p->~value_type();
-  }
-
- private:
-  ArenaAllocator* arena_allocator_;
-
-  template <typename U>
-  friend class ArenaAllocatorAdapter;
-
-  template <typename U>
-  friend bool operator==(const ArenaAllocatorAdapter<U>& lhs,
-                         const ArenaAllocatorAdapter<U>& rhs);
-};
-
-template <typename T>
-inline bool operator==(const ArenaAllocatorAdapter<T>& lhs,
-                       const ArenaAllocatorAdapter<T>& rhs) {
-  return lhs.arena_allocator_ == rhs.arena_allocator_;
-}
-
-template <typename T>
-inline bool operator!=(const ArenaAllocatorAdapter<T>& lhs,
-                       const ArenaAllocatorAdapter<T>& rhs) {
-  return !(lhs == rhs);
-}
-
-inline ArenaAllocatorAdapter<void> ArenaAllocator::Adapter(ArenaAllocKind kind) {
-  return ArenaAllocatorAdapter<void>(this, kind);
-}
-
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_ARENA_CONTAINERS_H_
diff --git a/compiler/utils/arena_object.h b/compiler/utils/arena_object.h
deleted file mode 100644
index d64c419954..0000000000
--- a/compiler/utils/arena_object.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARENA_OBJECT_H_
-#define ART_COMPILER_UTILS_ARENA_OBJECT_H_
-
-#include "arena_allocator.h"
-#include "base/logging.h"
-#include "scoped_arena_allocator.h"
-
-namespace art {
-
-// Parent for arena allocated objects giving appropriate new and delete operators.
-template<enum ArenaAllocKind kAllocKind>
-class ArenaObject {
- public:
-  // Allocate a new ArenaObject of 'size' bytes in the Arena.
-  void* operator new(size_t size, ArenaAllocator* allocator) {
-    return allocator->Alloc(size, kAllocKind);
-  }
-
-  static void* operator new(size_t size, ScopedArenaAllocator* arena) {
-    return arena->Alloc(size, kAllocKind);
-  }
-
-  void operator delete(void*, size_t) {
-    LOG(FATAL) << "UNREACHABLE";
-    UNREACHABLE();
-  }
-};
-
-
-// Parent for arena allocated objects that get deleted, gives appropriate new and delete operators.
-// Currently this is used by the quick compiler for debug reference counting arena allocations.
-template<enum ArenaAllocKind kAllocKind>
-class DeletableArenaObject {
- public:
-  // Allocate a new ArenaObject of 'size' bytes in the Arena.
-  void* operator new(size_t size, ArenaAllocator* allocator) {
-    return allocator->Alloc(size, kAllocKind);
-  }
-
-  static void* operator new(size_t size, ScopedArenaAllocator* arena) {
-    return arena->Alloc(size, kAllocKind);
-  }
-
-  void operator delete(void*, size_t) {
-    // Nop.
-  }
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_ARENA_OBJECT_H_
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 05287732c5..a52e6eb30f 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -166,7 +166,7 @@ uint32_t ShifterOperand::encodingThumb() const {
 }
 
 uint32_t Address::encodingArm() const {
-  CHECK(IsAbsoluteUint(12, offset_));
+  CHECK(IsAbsoluteUint<12>(offset_));
   uint32_t encoding;
   if (is_immed_offset_) {
     if (offset_ < 0) {
@@ -245,6 +245,7 @@ uint32_t Address::encodingThumb(bool is_32bit) const {
 
 // This is very like the ARM encoding except the offset is 10 bits.
 uint32_t Address::encodingThumbLdrdStrd() const {
+  DCHECK(IsImmediate());
   uint32_t encoding;
   uint32_t am = am_;
   // If P is 0 then W must be 1 (Different from ARM).
@@ -277,11 +278,12 @@ uint32_t Address::encoding3() const {
 
 // Encoding for vfp load/store addressing.
 uint32_t Address::vencoding() const {
+  CHECK(IsAbsoluteUint<10>(offset_));  // In the range -1020 to +1020.
+  CHECK_ALIGNED(offset_, 2);  // Multiple of 4.
+
   const uint32_t offset_mask = (1 << 12) - 1;
   uint32_t encoding = encodingArm();
   uint32_t offset = encoding & offset_mask;
-  CHECK(IsAbsoluteUint(10, offset));  // In the range -1020 to +1020.
-  CHECK_ALIGNED(offset, 2);  // Multiple of 4.
   CHECK((am_ == Offset) || (am_ == NegOffset));
   uint32_t vencoding_value = (encoding & (0xf << kRnShift)) | (offset >> 2);
   if (am_ == Offset) {
@@ -297,13 +299,13 @@ bool Address::CanHoldLoadOffsetArm(LoadOperandType type, int offset) {
     case kLoadSignedHalfword:
     case kLoadUnsignedHalfword:
     case kLoadWordPair:
-      return IsAbsoluteUint(8, offset);  // Addressing mode 3.
+      return IsAbsoluteUint<8>(offset);  // Addressing mode 3.
     case kLoadUnsignedByte:
     case kLoadWord:
-      return IsAbsoluteUint(12, offset);  // Addressing mode 2.
+      return IsAbsoluteUint<12>(offset);  // Addressing mode 2.
     case kLoadSWord:
     case kLoadDWord:
-      return IsAbsoluteUint(10, offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -315,13 +317,13 @@ bool Address::CanHoldStoreOffsetArm(StoreOperandType type, int offset) {
   switch (type) {
     case kStoreHalfword:
     case kStoreWordPair:
-      return IsAbsoluteUint(8, offset);  // Addressing mode 3.
+      return IsAbsoluteUint<8>(offset);  // Addressing mode 3.
     case kStoreByte:
     case kStoreWord:
-      return IsAbsoluteUint(12, offset);  // Addressing mode 2.
+      return IsAbsoluteUint<12>(offset);  // Addressing mode 2.
     case kStoreSWord:
     case kStoreDWord:
-      return IsAbsoluteUint(10, offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -335,12 +337,12 @@ bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
     case kLoadUnsignedHalfword:
     case kLoadUnsignedByte:
     case kLoadWord:
-      return IsAbsoluteUint(12, offset);
+      return IsAbsoluteUint<12>(offset);
     case kLoadSWord:
     case kLoadDWord:
-      return IsAbsoluteUint(10, offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
     case kLoadWordPair:
-      return IsAbsoluteUint(10, offset);
+      return IsAbsoluteUint<10>(offset);
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -353,12 +355,12 @@ bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
     case kStoreHalfword:
     case kStoreByte:
     case kStoreWord:
-      return IsAbsoluteUint(12, offset);
+      return IsAbsoluteUint<12>(offset);
     case kStoreSWord:
     case kStoreDWord:
-      return IsAbsoluteUint(10, offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
     case kStoreWordPair:
-      return IsAbsoluteUint(10, offset);
+      return IsAbsoluteUint<10>(offset);
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index d9122764d0..8730f52eca 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -536,8 +536,44 @@ class ArmAssembler : public Assembler {
   virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
   void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {
     if (!vmovs(sd, value, cond)) {
-      LoadImmediate(IP, bit_cast<int32_t, float>(value), cond);
-      vmovsr(sd, IP, cond);
+      int32_t int_value = bit_cast<int32_t, float>(value);
+      if (int_value == bit_cast<int32_t, float>(0.0f)) {
+        // 0.0 is quite common, so we special case it by loading
+        // 2.0 in `sd` and then substracting it.
+        bool success = vmovs(sd, 2.0, cond);
+        CHECK(success);
+        vsubs(sd, sd, sd, cond);
+      } else {
+        LoadImmediate(IP, int_value, cond);
+        vmovsr(sd, IP, cond);
+      }
+    }
+  }
+
+  void LoadDImmediate(DRegister sd, double value, Condition cond = AL) {
+    if (!vmovd(sd, value, cond)) {
+      uint64_t int_value = bit_cast<uint64_t, double>(value);
+      if (int_value == bit_cast<uint64_t, double>(0.0)) {
+        // 0.0 is quite common, so we special case it by loading
+        // 2.0 in `sd` and then substracting it.
+        bool success = vmovd(sd, 2.0, cond);
+        CHECK(success);
+        vsubd(sd, sd, sd, cond);
+      } else {
+        if (sd < 16) {
+          SRegister low = static_cast<SRegister>(sd << 1);
+          SRegister high = static_cast<SRegister>(low + 1);
+          LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
+          if (High32Bits(int_value) == Low32Bits(int_value)) {
+            vmovs(high, low);
+          } else {
+            LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
+          }
+        } else {
+          LOG(FATAL) << "Unimplemented loading of double into a D register "
+                     << "that cannot be split into two S registers";
+        }
+      }
     }
   }
 
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 8d1fb60725..95796916b4 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1254,7 +1254,7 @@ void Arm32Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
 
 
 void Arm32Assembler::svc(uint32_t imm24) {
-  CHECK(IsUint(24, imm24)) << imm24;
+  CHECK(IsUint<24>(imm24)) << imm24;
   int32_t encoding = (AL << kConditionShift) | B27 | B26 | B25 | B24 | imm24;
   Emit(encoding);
 }
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 5383c28f82..6d0571e263 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2080,7 +2080,7 @@ void Thumb2Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR.
 
 
 void Thumb2Assembler::svc(uint32_t imm8) {
-  CHECK(IsUint(8, imm8)) << imm8;
+  CHECK(IsUint<8>(imm8)) << imm8;
   int16_t encoding = B15 | B14 | B12 |
        B11 | B10 | B9 | B8 |
        imm8;
@@ -2089,7 +2089,7 @@ void Thumb2Assembler::svc(uint32_t imm8) {
 
 
 void Thumb2Assembler::bkpt(uint16_t imm8) {
-  CHECK(IsUint(8, imm8)) << imm8;
+  CHECK(IsUint<8>(imm8)) << imm8;
   int16_t encoding = B15 | B13 | B12 |
       B11 | B10 | B9 |
       imm8;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index e571e72402..ebea9d4262 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -198,6 +198,18 @@ TEST_F(AssemblerThumb2Test, strexd) {
   DriverStr(expected, "strexd");
 }
 
+TEST_F(AssemblerThumb2Test, LdrdStrd) {
+  GetAssembler()->ldrd(arm::R0, arm::Address(arm::R2, 8));
+  GetAssembler()->ldrd(arm::R0, arm::Address(arm::R12));
+  GetAssembler()->strd(arm::R0, arm::Address(arm::R2, 8));
+
+  const char* expected =
+      "ldrd r0, r1, [r2, #8]\n"
+      "ldrd r0, r1, [r12]\n"
+      "strd r0, r1, [r2, #8]\n";
+  DriverStr(expected, "ldrdstrd");
+}
+
 TEST_F(AssemblerThumb2Test, eor) {
 #define __ GetAssembler()->
   __ eor(arm::R1, arm::R1, arm::ShifterOperand(arm::R0));
diff --git a/compiler/utils/debug_stack.h b/compiler/utils/debug_stack.h
deleted file mode 100644
index 1bb0624187..0000000000
--- a/compiler/utils/debug_stack.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_DEBUG_STACK_H_
-#define ART_COMPILER_UTILS_DEBUG_STACK_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "globals.h"
-
-namespace art {
-
-// Helper classes for reference counting to enforce construction/destruction order and
-// usage of the top element of a stack in debug mode with no overhead in release mode.
-
-// Reference counter. No references allowed in destructor or in explicitly called CheckNoRefs().
-template <bool kIsDebug>
-class DebugStackRefCounterImpl;
-// Reference. Allows an explicit check that it's the top reference.
-template <bool kIsDebug>
-class DebugStackReferenceImpl;
-// Indirect top reference. Checks that the reference is the top reference when used.
-template <bool kIsDebug>
-class DebugStackIndirectTopRefImpl;
-
-typedef DebugStackRefCounterImpl<kIsDebugBuild> DebugStackRefCounter;
-typedef DebugStackReferenceImpl<kIsDebugBuild> DebugStackReference;
-typedef DebugStackIndirectTopRefImpl<kIsDebugBuild> DebugStackIndirectTopRef;
-
-// Non-debug mode specializations. This should be optimized away.
-
-template <>
-class DebugStackRefCounterImpl<false> {
- public:
-  size_t IncrementRefCount() { return 0u; }
-  void DecrementRefCount() { }
-  size_t GetRefCount() const { return 0u; }
-  void CheckNoRefs() const { }
-};
-
-template <>
-class DebugStackReferenceImpl<false> {
- public:
-  explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<false>* counter) { UNUSED(counter); }
-  DebugStackReferenceImpl(const DebugStackReferenceImpl& other) = default;
-  DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) = default;
-  void CheckTop() { }
-};
-
-template <>
-class DebugStackIndirectTopRefImpl<false> {
- public:
-  explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<false>* ref) { UNUSED(ref); }
-  DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other) = default;
-  DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) = default;
-  void CheckTop() { }
-};
-
-// Debug mode versions.
-
-template <bool kIsDebug>
-class DebugStackRefCounterImpl {
- public:
-  DebugStackRefCounterImpl() : ref_count_(0u) { }
-  ~DebugStackRefCounterImpl() { CheckNoRefs(); }
-  size_t IncrementRefCount() { return ++ref_count_; }
-  void DecrementRefCount() { --ref_count_; }
-  size_t GetRefCount() const { return ref_count_; }
-  void CheckNoRefs() const { CHECK_EQ(ref_count_, 0u); }
-
- private:
-  size_t ref_count_;
-};
-
-template <bool kIsDebug>
-class DebugStackReferenceImpl {
- public:
-  explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<kIsDebug>* counter)
-    : counter_(counter), ref_count_(counter->IncrementRefCount()) {
-  }
-  DebugStackReferenceImpl(const DebugStackReferenceImpl& other)
-    : counter_(other.counter_), ref_count_(counter_->IncrementRefCount()) {
-  }
-  DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) {
-    CHECK(counter_ == other.counter_);
-    return *this;
-  }
-  ~DebugStackReferenceImpl() { counter_->DecrementRefCount(); }
-  void CheckTop() { CHECK_EQ(counter_->GetRefCount(), ref_count_); }
-
- private:
-  DebugStackRefCounterImpl<true>* counter_;
-  size_t ref_count_;
-};
-
-template <bool kIsDebug>
-class DebugStackIndirectTopRefImpl {
- public:
-  explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<kIsDebug>* ref)
-      : ref_(ref) {
-    CheckTop();
-  }
-  DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other)
-      : ref_(other.ref_) {
-    CheckTop();
-  }
-  DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) {
-    CHECK(ref_ == other.ref_);
-    CheckTop();
-    return *this;
-  }
-  ~DebugStackIndirectTopRefImpl() {
-    CheckTop();
-  }
-  void CheckTop() {
-    ref_->CheckTop();
-  }
-
- private:
-  DebugStackReferenceImpl<kIsDebug>* ref_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_DEBUG_STACK_H_
diff --git a/compiler/utils/dex_instruction_utils.h b/compiler/utils/dex_instruction_utils.h
index 2c6e525e1d..bb2c592f13 100644
--- a/compiler/utils/dex_instruction_utils.h
+++ b/compiler/utils/dex_instruction_utils.h
@@ -110,6 +110,10 @@ constexpr bool IsInstructionAGetOrAPut(Instruction::Code code) {
   return Instruction::AGET <= code && code <= Instruction::APUT_SHORT;
 }
 
+constexpr bool IsInstructionBinOp2Addr(Instruction::Code code) {
+  return Instruction::ADD_INT_2ADDR <= code && code <= Instruction::REM_DOUBLE_2ADDR;
+}
+
 // TODO: Remove the #if guards below when we fully migrate to C++14.
 
 constexpr bool IsInvokeInstructionRange(Instruction::Code opcode) {
diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc
index 83e5f5ad39..a7e09c6517 100644
--- a/compiler/utils/dwarf_cfi.cc
+++ b/compiler/utils/dwarf_cfi.cc
@@ -37,7 +37,7 @@ void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) {
   } else {
     // Four byte delta.
     buf->push_back(0x04);
-    PushWord(buf, increment);
+    Push32(buf, increment);
   }
 }
 
@@ -68,35 +68,35 @@ void DW_CFA_restore_state(std::vector<uint8_t>* buf) {
 void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) {
   // 'length' (filled in by other functions).
   if (is_64bit) {
-    PushWord(buf, 0xffffffff);  // Indicates 64bit
-    PushWord(buf, 0);
-    PushWord(buf, 0);
+    Push32(buf, 0xffffffff);  // Indicates 64bit
+    Push32(buf, 0);
+    Push32(buf, 0);
   } else {
-    PushWord(buf, 0);
+    Push32(buf, 0);
   }
 
   // 'CIE_pointer' (filled in by linker).
   if (is_64bit) {
-    PushWord(buf, 0);
-    PushWord(buf, 0);
+    Push32(buf, 0);
+    Push32(buf, 0);
   } else {
-    PushWord(buf, 0);
+    Push32(buf, 0);
   }
 
   // 'initial_location' (filled in by linker).
   if (is_64bit) {
-    PushWord(buf, 0);
-    PushWord(buf, 0);
+    Push32(buf, 0);
+    Push32(buf, 0);
   } else {
-    PushWord(buf, 0);
+    Push32(buf, 0);
   }
 
   // 'address_range' (filled in by other functions).
   if (is_64bit) {
-    PushWord(buf, 0);
-    PushWord(buf, 0);
+    Push32(buf, 0);
+    Push32(buf, 0);
   } else {
-    PushWord(buf, 0);
+    Push32(buf, 0);
   }
 
   // Augmentation length: 0
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index 6af4853e09..821e28b4a0 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -20,7 +20,7 @@
 #include <stdint.h>
 #include <stddef.h>
 
-#include "arena_object.h"
+#include "base/arena_object.h"
 
 namespace art {
 
@@ -33,16 +33,14 @@ class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> {
       : arena_(arena),
         num_allocated_(init_length),
         num_used_(0) {
-      elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length,
-                                                 kArenaAllocGrowableArray));
+      elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray);
     }
 
     GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data)
       : arena_(arena),
         num_allocated_(init_length),
         num_used_(init_length) {
-      elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length,
-                                                 kArenaAllocGrowableArray));
+      elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray);
       for (size_t i = 0; i < init_length; ++i) {
         elem_list_[i] = initial_data;
       }
@@ -58,8 +56,7 @@ class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> {
       if (new_length > target_length) {
          target_length = new_length;
       }
-      T* new_array = static_cast<T*>(arena_->Alloc(sizeof(T) * target_length,
-                                                   kArenaAllocGrowableArray));
+      T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray);
       memcpy(new_array, elem_list_, sizeof(T) * num_allocated_);
       num_allocated_ = target_length;
       elem_list_ = new_array;
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
deleted file mode 100644
index d9e0619de6..0000000000
--- a/compiler/utils/scoped_arena_allocator.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scoped_arena_allocator.h"
-
-#include "utils/arena_allocator.h"
-#include <memcheck/memcheck.h>
-
-namespace art {
-
-static constexpr size_t kValgrindRedZoneBytes = 8;
-
-ArenaStack::ArenaStack(ArenaPool* arena_pool)
-  : DebugStackRefCounter(),
-    stats_and_pool_(arena_pool),
-    bottom_arena_(nullptr),
-    top_arena_(nullptr),
-    top_ptr_(nullptr),
-    top_end_(nullptr),
-    running_on_valgrind_(RUNNING_ON_VALGRIND > 0) {
-}
-
-ArenaStack::~ArenaStack() {
-  DebugStackRefCounter::CheckNoRefs();
-  stats_and_pool_.pool->FreeArenaChain(bottom_arena_);
-}
-
-void ArenaStack::Reset() {
-  DebugStackRefCounter::CheckNoRefs();
-  stats_and_pool_.pool->FreeArenaChain(bottom_arena_);
-  bottom_arena_ = nullptr;
-  top_arena_  = nullptr;
-  top_ptr_ = nullptr;
-  top_end_ = nullptr;
-}
-
-MemStats ArenaStack::GetPeakStats() const {
-  DebugStackRefCounter::CheckNoRefs();
-  return MemStats("ArenaStack peak", static_cast<const TaggedStats<Peak>*>(&stats_and_pool_),
-                  bottom_arena_);
-}
-
-uint8_t* ArenaStack::AllocateFromNextArena(size_t rounded_bytes) {
-  UpdateBytesAllocated();
-  size_t allocation_size = std::max(Arena::kDefaultSize, rounded_bytes);
-  if (UNLIKELY(top_arena_ == nullptr)) {
-    top_arena_ = bottom_arena_ = stats_and_pool_.pool->AllocArena(allocation_size);
-    top_arena_->next_ = nullptr;
-  } else if (top_arena_->next_ != nullptr && top_arena_->next_->Size() >= allocation_size) {
-    top_arena_ = top_arena_->next_;
-  } else {
-    Arena* tail = top_arena_->next_;
-    top_arena_->next_ = stats_and_pool_.pool->AllocArena(allocation_size);
-    top_arena_ = top_arena_->next_;
-    top_arena_->next_ = tail;
-  }
-  top_end_ = top_arena_->End();
-  // top_ptr_ shall be updated by ScopedArenaAllocator.
-  return top_arena_->Begin();
-}
-
-void ArenaStack::UpdatePeakStatsAndRestore(const ArenaAllocatorStats& restore_stats) {
-  if (PeakStats()->BytesAllocated() < CurrentStats()->BytesAllocated()) {
-    PeakStats()->Copy(*CurrentStats());
-  }
-  CurrentStats()->Copy(restore_stats);
-}
-
-void ArenaStack::UpdateBytesAllocated() {
-  if (top_arena_ != nullptr) {
-    // Update how many bytes we have allocated into the arena so that the arena pool knows how
-    // much memory to zero out. Though ScopedArenaAllocator doesn't guarantee the memory is
-    // zero-initialized, the Arena may be reused by ArenaAllocator which does guarantee this.
-    size_t allocated = static_cast<size_t>(top_ptr_ - top_arena_->Begin());
-    if (top_arena_->bytes_allocated_ < allocated) {
-      top_arena_->bytes_allocated_ = allocated;
-    }
-  }
-}
-
-void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8);
-  uint8_t* ptr = top_ptr_;
-  if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
-    ptr = AllocateFromNextArena(rounded_bytes);
-    CHECK(ptr != nullptr) << "Failed to allocate memory";
-  }
-  CurrentStats()->RecordAlloc(bytes, kind);
-  top_ptr_ = ptr + rounded_bytes;
-  VALGRIND_MAKE_MEM_UNDEFINED(ptr, bytes);
-  VALGRIND_MAKE_MEM_NOACCESS(ptr + bytes, rounded_bytes - bytes);
-  return ptr;
-}
-
-ScopedArenaAllocator::ScopedArenaAllocator(ArenaStack* arena_stack)
-  : DebugStackReference(arena_stack),
-    DebugStackRefCounter(),
-    ArenaAllocatorStats(*arena_stack->CurrentStats()),
-    arena_stack_(arena_stack),
-    mark_arena_(arena_stack->top_arena_),
-    mark_ptr_(arena_stack->top_ptr_),
-    mark_end_(arena_stack->top_end_) {
-}
-
-ScopedArenaAllocator::~ScopedArenaAllocator() {
-  DoReset();
-}
-
-void ScopedArenaAllocator::Reset() {
-  DoReset();
-  // If this allocator was Create()d, we need to move the arena_stack_->top_ptr_ past *this.
-  if (mark_ptr_ == reinterpret_cast<uint8_t*>(this)) {
-    arena_stack_->top_ptr_ = mark_ptr_ + RoundUp(sizeof(ScopedArenaAllocator), 8);
-  }
-}
-
-void ScopedArenaAllocator::DoReset() {
-  DebugStackReference::CheckTop();
-  DebugStackRefCounter::CheckNoRefs();
-  arena_stack_->UpdatePeakStatsAndRestore(*this);
-  arena_stack_->UpdateBytesAllocated();
-  if (LIKELY(mark_arena_ != nullptr)) {
-    arena_stack_->top_arena_ = mark_arena_;
-    arena_stack_->top_ptr_ = mark_ptr_;
-    arena_stack_->top_end_ = mark_end_;
-  } else if (arena_stack_->bottom_arena_ != nullptr) {
-    mark_arena_ = arena_stack_->top_arena_ = arena_stack_->bottom_arena_;
-    mark_ptr_ = arena_stack_->top_ptr_ = mark_arena_->Begin();
-    mark_end_ = arena_stack_->top_end_ = mark_arena_->End();
-  }
-}
-
-}  // namespace art
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
deleted file mode 100644
index 523f158969..0000000000
--- a/compiler/utils/scoped_arena_allocator.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_
-#define ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "utils/arena_allocator.h"
-#include "utils/debug_stack.h"
-#include "globals.h"
-
-namespace art {
-
-class ArenaStack;
-class ScopedArenaAllocator;
-
-template <typename T>
-class ScopedArenaAllocatorAdapter;
-
-// Holds a list of Arenas for use by ScopedArenaAllocator stack.
-class ArenaStack : private DebugStackRefCounter {
- public:
-  explicit ArenaStack(ArenaPool* arena_pool);
-  ~ArenaStack();
-
-  void Reset();
-
-  size_t PeakBytesAllocated() {
-    return PeakStats()->BytesAllocated();
-  }
-
-  MemStats GetPeakStats() const;
-
- private:
-  struct Peak;
-  struct Current;
-  template <typename Tag> struct TaggedStats : ArenaAllocatorStats { };
-  struct StatsAndPool : TaggedStats<Peak>, TaggedStats<Current> {
-    explicit StatsAndPool(ArenaPool* arena_pool) : pool(arena_pool) { }
-    ArenaPool* const pool;
-  };
-
-  ArenaAllocatorStats* PeakStats() {
-    return static_cast<TaggedStats<Peak>*>(&stats_and_pool_);
-  }
-
-  ArenaAllocatorStats* CurrentStats() {
-    return static_cast<TaggedStats<Current>*>(&stats_and_pool_);
-  }
-
-  // Private - access via ScopedArenaAllocator or ScopedArenaAllocatorAdapter.
-  void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE {
-    if (UNLIKELY(running_on_valgrind_)) {
-      return AllocValgrind(bytes, kind);
-    }
-    size_t rounded_bytes = RoundUp(bytes, 8);
-    uint8_t* ptr = top_ptr_;
-    if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
-      ptr = AllocateFromNextArena(rounded_bytes);
-    }
-    CurrentStats()->RecordAlloc(bytes, kind);
-    top_ptr_ = ptr + rounded_bytes;
-    return ptr;
-  }
-
-  uint8_t* AllocateFromNextArena(size_t rounded_bytes);
-  void UpdatePeakStatsAndRestore(const ArenaAllocatorStats& restore_stats);
-  void UpdateBytesAllocated();
-  void* AllocValgrind(size_t bytes, ArenaAllocKind kind);
-
-  StatsAndPool stats_and_pool_;
-  Arena* bottom_arena_;
-  Arena* top_arena_;
-  uint8_t* top_ptr_;
-  uint8_t* top_end_;
-
-  const bool running_on_valgrind_;
-
-  friend class ScopedArenaAllocator;
-  template <typename T>
-  friend class ScopedArenaAllocatorAdapter;
-
-  DISALLOW_COPY_AND_ASSIGN(ArenaStack);
-};
-
-class ScopedArenaAllocator
-    : private DebugStackReference, private DebugStackRefCounter, private ArenaAllocatorStats {
- public:
-  // Create a ScopedArenaAllocator directly on the ArenaStack when the scope of
-  // the allocator is not exactly a C++ block scope. For example, an optimization
-  // pass can create the scoped allocator in Start() and destroy it in End().
-  static ScopedArenaAllocator* Create(ArenaStack* arena_stack) {
-    void* addr = arena_stack->Alloc(sizeof(ScopedArenaAllocator), kArenaAllocMisc);
-    ScopedArenaAllocator* allocator = new(addr) ScopedArenaAllocator(arena_stack);
-    allocator->mark_ptr_ = reinterpret_cast<uint8_t*>(addr);
-    return allocator;
-  }
-
-  explicit ScopedArenaAllocator(ArenaStack* arena_stack);
-  ~ScopedArenaAllocator();
-
-  void Reset();
-
-  void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE {
-    DebugStackReference::CheckTop();
-    return arena_stack_->Alloc(bytes, kind);
-  }
-
-  // Get adapter for use in STL containers. See scoped_arena_containers.h .
-  ScopedArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL);
-
-  // Allow a delete-expression to destroy but not deallocate allocators created by Create().
-  static void operator delete(void* ptr) { UNUSED(ptr); }
-
- private:
-  ArenaStack* const arena_stack_;
-  Arena* mark_arena_;
-  uint8_t* mark_ptr_;
-  uint8_t* mark_end_;
-
-  void DoReset();
-
-  template <typename T>
-  friend class ScopedArenaAllocatorAdapter;
-
-  DISALLOW_COPY_AND_ASSIGN(ScopedArenaAllocator);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_
diff --git a/compiler/utils/scoped_arena_containers.h b/compiler/utils/scoped_arena_containers.h
deleted file mode 100644
index df93b273d1..0000000000
--- a/compiler/utils/scoped_arena_containers.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
-#define ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
-
-#include <deque>
-#include <queue>
-#include <set>
-#include <vector>
-
-#include "utils/arena_containers.h"  // For ArenaAllocatorAdapterKind.
-#include "utils/scoped_arena_allocator.h"
-#include "safe_map.h"
-
-namespace art {
-
-// Adapter for use of ScopedArenaAllocator in STL containers.
-// Use ScopedArenaAllocator::Adapter() to create an adapter to pass to container constructors.
-// For example,
-//   void foo(ScopedArenaAllocator* allocator) {
-//     ScopedArenaVector<int> foo_vector(allocator->Adapter(kArenaAllocMisc));
-//     ScopedArenaSafeMap<int, int> foo_map(std::less<int>(), allocator->Adapter());
-//     // Use foo_vector and foo_map...
-//   }
-template <typename T>
-class ScopedArenaAllocatorAdapter;
-
-template <typename T>
-using ScopedArenaDeque = std::deque<T, ScopedArenaAllocatorAdapter<T>>;
-
-template <typename T>
-using ScopedArenaQueue = std::queue<T, ScopedArenaDeque<T>>;
-
-template <typename T>
-using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>;
-
-template <typename T, typename Comparator = std::less<T>>
-using ScopedArenaSet = std::set<T, Comparator, ScopedArenaAllocatorAdapter<T>>;
-
-template <typename K, typename V, typename Comparator = std::less<K>>
-using ScopedArenaSafeMap =
-    SafeMap<K, V, Comparator, ScopedArenaAllocatorAdapter<std::pair<const K, V>>>;
-
-// Implementation details below.
-
-template <>
-class ScopedArenaAllocatorAdapter<void>
-    : private DebugStackReference, private DebugStackIndirectTopRef,
-      private ArenaAllocatorAdapterKind {
- public:
-  typedef void value_type;
-  typedef void* pointer;
-  typedef const void* const_pointer;
-
-  template <typename U>
-  struct rebind {
-    typedef ScopedArenaAllocatorAdapter<U> other;
-  };
-
-  explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator,
-                                       ArenaAllocKind kind = kArenaAllocSTL)
-      : DebugStackReference(arena_allocator),
-        DebugStackIndirectTopRef(arena_allocator),
-        ArenaAllocatorAdapterKind(kind),
-        arena_stack_(arena_allocator->arena_stack_) {
-  }
-  template <typename U>
-  ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other)
-      : DebugStackReference(other),
-        DebugStackIndirectTopRef(other),
-        ArenaAllocatorAdapterKind(other),
-        arena_stack_(other.arena_stack_) {
-  }
-  ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default;
-  ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default;
-  ~ScopedArenaAllocatorAdapter() = default;
-
- private:
-  ArenaStack* arena_stack_;
-
-  template <typename U>
-  friend class ScopedArenaAllocatorAdapter;
-};
-
-template <typename T>
-class ScopedArenaAllocatorAdapter
-    : private DebugStackReference, private DebugStackIndirectTopRef,
-      private ArenaAllocatorAdapterKind {
- public:
-  typedef T value_type;
-  typedef T* pointer;
-  typedef T& reference;
-  typedef const T* const_pointer;
-  typedef const T& const_reference;
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-
-  template <typename U>
-  struct rebind {
-    typedef ScopedArenaAllocatorAdapter<U> other;
-  };
-
-  explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator,
-                                       ArenaAllocKind kind = kArenaAllocSTL)
-      : DebugStackReference(arena_allocator),
-        DebugStackIndirectTopRef(arena_allocator),
-        ArenaAllocatorAdapterKind(kind),
-        arena_stack_(arena_allocator->arena_stack_) {
-  }
-  template <typename U>
-  ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other)
-      : DebugStackReference(other),
-        DebugStackIndirectTopRef(other),
-        ArenaAllocatorAdapterKind(other),
-        arena_stack_(other.arena_stack_) {
-  }
-  ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default;
-  ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default;
-  ~ScopedArenaAllocatorAdapter() = default;
-
-  size_type max_size() const {
-    return static_cast<size_type>(-1) / sizeof(T);
-  }
-
-  pointer address(reference x) const { return &x; }
-  const_pointer address(const_reference x) const { return &x; }
-
-  pointer allocate(size_type n, ScopedArenaAllocatorAdapter<void>::pointer hint = nullptr) {
-    UNUSED(hint);
-    DCHECK_LE(n, max_size());
-    DebugStackIndirectTopRef::CheckTop();
-    return reinterpret_cast<T*>(arena_stack_->Alloc(n * sizeof(T),
-                                                    ArenaAllocatorAdapterKind::Kind()));
-  }
-  void deallocate(pointer p, size_type n) {
-    UNUSED(p);
-    UNUSED(n);
-    DebugStackIndirectTopRef::CheckTop();
-  }
-
-  void construct(pointer p, const_reference val) {
-    // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top.
-    new (static_cast<void*>(p)) value_type(val);
-  }
-  void destroy(pointer p) {
-    // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top.
-    p->~value_type();
-  }
-
- private:
-  ArenaStack* arena_stack_;
-
-  template <typename U>
-  friend class ScopedArenaAllocatorAdapter;
-
-  template <typename U>
-  friend bool operator==(const ScopedArenaAllocatorAdapter<U>& lhs,
-                         const ScopedArenaAllocatorAdapter<U>& rhs);
-};
-
-template <typename T>
-inline bool operator==(const ScopedArenaAllocatorAdapter<T>& lhs,
-                       const ScopedArenaAllocatorAdapter<T>& rhs) {
-  return lhs.arena_stack_ == rhs.arena_stack_;
-}
-
-template <typename T>
-inline bool operator!=(const ScopedArenaAllocatorAdapter<T>& lhs,
-                       const ScopedArenaAllocatorAdapter<T>& rhs) {
-  return !(lhs == rhs);
-}
-
-inline ScopedArenaAllocatorAdapter<void> ScopedArenaAllocator::Adapter(ArenaAllocKind kind) {
-  return ScopedArenaAllocatorAdapter<void>(this, kind);
-}
-
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 2d0d77af78..1f8f5da6cd 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -23,12 +23,12 @@
 #include <stdint.h>
 #include <stddef.h>
 
+#include "base/debug_stack.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "mem_map.h"
 #include "utils.h"
-#include "utils/debug_stack.h"
 
 namespace art {
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 03744e4149..8f4208b417 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1290,7 +1290,7 @@ void X86Assembler::j(Condition condition, Label* label) {
     static const int kLongSize = 6;
     int offset = label->Position() - buffer_.Size();
     CHECK_LE(offset, 0);
-    if (IsInt(8, offset - kShortSize)) {
+    if (IsInt<8>(offset - kShortSize)) {
       EmitUint8(0x70 + condition);
       EmitUint8((offset - kShortSize) & 0xFF);
     } else {
@@ -1325,7 +1325,7 @@ void X86Assembler::jmp(Label* label) {
     static const int kLongSize = 5;
     int offset = label->Position() - buffer_.Size();
     CHECK_LE(offset, 0);
-    if (IsInt(8, offset - kShortSize)) {
+    if (IsInt<8>(offset - kShortSize)) {
       EmitUint8(0xEB);
       EmitUint8((offset - kShortSize) & 0xFF);
     } else {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 3a44ace649..2dde90744e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -35,10 +35,10 @@ class Immediate : public ValueObject {
 
   int32_t value() const { return value_; }
 
-  bool is_int8() const { return IsInt(8, value_); }
-  bool is_uint8() const { return IsUint(8, value_); }
-  bool is_int16() const { return IsInt(16, value_); }
-  bool is_uint16() const { return IsUint(16, value_); }
+  bool is_int8() const { return IsInt<8>(value_); }
+  bool is_uint8() const { return IsUint<8>(value_); }
+  bool is_int16() const { return IsInt<16>(value_); }
+  bool is_uint16() const { return IsUint<16>(value_); }
 
  private:
   const int32_t value_;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 7e8e769249..f2704b72a4 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -328,6 +328,14 @@ void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
 }
 
 
+void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x8D);
+  EmitOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1507,7 +1515,7 @@ void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
 
   // See whether imm can be represented as a sign-extended 8bit value.
   int32_t v32 = static_cast<int32_t>(imm.value());
-  if (IsInt32(8, v32)) {
+  if (IsInt<8>(v32)) {
     // Sign-extension works.
     EmitUint8(0x6B);
     EmitOperand(reg.LowBits(), Operand(reg));
@@ -1547,7 +1555,7 @@ void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
 
   // See whether imm can be represented as a sign-extended 8bit value.
   int64_t v64 = imm.value();
-  if (IsInt64(8, v64)) {
+  if (IsInt<8>(v64)) {
     // Sign-extension works.
     EmitUint8(0x6B);
     EmitOperand(reg.LowBits(), Operand(reg));
@@ -1697,7 +1705,7 @@ void X86_64Assembler::notq(CpuRegister reg) {
 void X86_64Assembler::enter(const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xC8);
-  CHECK(imm.is_uint16());
+  CHECK(imm.is_uint16()) << imm.value();
   EmitUint8(imm.value() & 0xFF);
   EmitUint8((imm.value() >> 8) & 0xFF);
   EmitUint8(0x00);
@@ -1751,7 +1759,7 @@ void X86_64Assembler::j(Condition condition, Label* label) {
     static const int kLongSize = 6;
     int offset = label->Position() - buffer_.Size();
     CHECK_LE(offset, 0);
-    if (IsInt(8, offset - kShortSize)) {
+    if (IsInt<8>(offset - kShortSize)) {
       EmitUint8(0x70 + condition);
       EmitUint8((offset - kShortSize) & 0xFF);
     } else {
@@ -1788,7 +1796,7 @@ void X86_64Assembler::jmp(Label* label) {
     static const int kLongSize = 5;
     int offset = label->Position() - buffer_.Size();
     CHECK_LE(offset, 0);
-    if (IsInt(8, offset - kShortSize)) {
+    if (IsInt<8>(offset - kShortSize)) {
       EmitUint8(0xEB);
       EmitUint8((offset - kShortSize) & 0xFF);
     } else {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 2fc251b07a..5dfcf4541b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -42,15 +42,11 @@ class Immediate : public ValueObject {
 
   int64_t value() const { return value_; }
 
-  bool is_int8() const { return IsInt(8, value_); }
-  bool is_uint8() const { return IsUint(8, value_); }
-  bool is_int16() const { return IsInt(16, value_); }
-  bool is_uint16() const { return IsUint(16, value_); }
-  bool is_int32() const {
-    // This does not work on 32b machines: return IsInt(32, value_);
-    int64_t limit = static_cast<int64_t>(1) << 31;
-    return (-limit <= value_) && (value_ < limit);
-  }
+  bool is_int8() const { return IsInt<8>(value_); }
+  bool is_uint8() const { return IsUint<8>(value_); }
+  bool is_int16() const { return IsInt<16>(value_); }
+  bool is_uint16() const { return IsUint<16>(value_); }
+  bool is_int32() const { return IsInt<32>(value_); }
 
  private:
   const int64_t value_;
@@ -296,6 +292,7 @@ class X86_64Assembler FINAL : public Assembler {
   void movw(const Address& dst, const Immediate& imm);
 
   void leaq(CpuRegister dst, const Address& src);
+  void leal(CpuRegister dst, const Address& src);
 
   void movaps(XmmRegister dst, XmmRegister src);
 
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 6df4144004..00f508b23f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -44,10 +44,10 @@ static constexpr size_t kRandomIterations = 100000;  // Hosts are pretty powerfu
 TEST(AssemblerX86_64, SignExtension) {
   // 32bit.
   for (int32_t i = 0; i < 128; i++) {
-    EXPECT_TRUE(IsInt32(8, i)) << i;
+    EXPECT_TRUE(IsInt<8>(i)) << i;
   }
   for (int32_t i = 128; i < 255; i++) {
-    EXPECT_FALSE(IsInt32(8, i)) << i;
+    EXPECT_FALSE(IsInt<8>(i)) << i;
   }
   // Do some higher ones randomly.
   std::random_device rd;
@@ -55,54 +55,65 @@ TEST(AssemblerX86_64, SignExtension) {
   std::uniform_int_distribution<int32_t> uniform_dist(256, INT32_MAX);
   for (size_t i = 0; i < kRandomIterations; i++) {
     int32_t value = uniform_dist(e1);
-    EXPECT_FALSE(IsInt32(8, value)) << value;
+    EXPECT_FALSE(IsInt<8>(value)) << value;
   }
 
   // Negative ones.
   for (int32_t i = -1; i >= -128; i--) {
-    EXPECT_TRUE(IsInt32(8, i)) << i;
+    EXPECT_TRUE(IsInt<8>(i)) << i;
   }
 
   for (int32_t i = -129; i > -256; i--) {
-    EXPECT_FALSE(IsInt32(8, i)) << i;
+    EXPECT_FALSE(IsInt<8>(i)) << i;
   }
 
   // Do some lower ones randomly.
   std::uniform_int_distribution<int32_t> uniform_dist2(INT32_MIN, -256);
   for (size_t i = 0; i < 100; i++) {
     int32_t value = uniform_dist2(e1);
-    EXPECT_FALSE(IsInt32(8, value)) << value;
+    EXPECT_FALSE(IsInt<8>(value)) << value;
   }
 
   // 64bit.
   for (int64_t i = 0; i < 128; i++) {
-    EXPECT_TRUE(IsInt64(8, i)) << i;
+    EXPECT_TRUE(IsInt<8>(i)) << i;
   }
   for (int32_t i = 128; i < 255; i++) {
-    EXPECT_FALSE(IsInt64(8, i)) << i;
+    EXPECT_FALSE(IsInt<8>(i)) << i;
   }
   // Do some higher ones randomly.
   std::uniform_int_distribution<int64_t> uniform_dist3(256, INT64_MAX);
   for (size_t i = 0; i < 100; i++) {
     int64_t value = uniform_dist3(e1);
-    EXPECT_FALSE(IsInt64(8, value)) << value;
+    EXPECT_FALSE(IsInt<8>(value)) << value;
   }
 
   // Negative ones.
   for (int64_t i = -1; i >= -128; i--) {
-    EXPECT_TRUE(IsInt64(8, i)) << i;
+    EXPECT_TRUE(IsInt<8>(i)) << i;
   }
 
   for (int64_t i = -129; i > -256; i--) {
-    EXPECT_FALSE(IsInt64(8, i)) << i;
+    EXPECT_FALSE(IsInt<8>(i)) << i;
   }
 
   // Do some lower ones randomly.
   std::uniform_int_distribution<int64_t> uniform_dist4(INT64_MIN, -256);
   for (size_t i = 0; i < kRandomIterations; i++) {
     int64_t value = uniform_dist4(e1);
-    EXPECT_FALSE(IsInt64(8, value)) << value;
+    EXPECT_FALSE(IsInt<8>(value)) << value;
   }
+
+  int64_t value = INT64_C(0x1200000010);
+  x86_64::Immediate imm(value);
+  EXPECT_FALSE(imm.is_int8());
+  EXPECT_FALSE(imm.is_int16());
+  EXPECT_FALSE(imm.is_int32());
+  value = INT64_C(0x8000000000000001);
+  x86_64::Immediate imm2(value);
+  EXPECT_FALSE(imm2.is_int8());
+  EXPECT_FALSE(imm2.is_int16());
+  EXPECT_FALSE(imm2.is_int32());
 }
 
 struct X86_64CpuRegisterCompare {