81 files changed, 3285 insertions, 1658 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 88fdee0f10..cfce9f70ce 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -60,7 +60,9 @@ LIBART_COMPILER_SRC_FILES := \
 	dex/mir_method_info.cc \
 	dex/mir_optimization.cc \
 	dex/bb_optimizations.cc \
-	dex/pass_driver_me.cc \
+	dex/post_opt_passes.cc \
+	dex/pass_driver_me_opts.cc \
+	dex/pass_driver_me_post_opt.cc \
 	dex/frontend.cc \
 	dex/mir_graph.cc \
 	dex/mir_analysis.cc \
@@ -177,8 +179,10 @@ define build-libart-compiler
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
+    LOCAL_SHARED_LIBRARIES += libart
   else # debug
     LOCAL_MODULE := libartd-compiler
+    LOCAL_SHARED_LIBRARIES += libartd
   endif
 
   LOCAL_MODULE_TAGS := optional
@@ -200,32 +204,21 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
   include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    LOCAL_CLANG := $(ART_TARGET_CLANG)
-    LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
+    $(call set-target-local-clang-vars)
+    $(call set-target-local-cflags-vars,$(2))
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+    ifeq ($$(art_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+    endif
   endif
 
   # TODO: clean up the compilers and remove this.
   LOCAL_CFLAGS += -Wno-unused-parameter
 
-  LOCAL_SHARED_LIBRARIES += liblog
-  ifeq ($$(art_ndebug_or_debug),debug)
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS)
-    else # host
-      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
-    endif
-    LOCAL_SHARED_LIBRARIES += libartd
-  else
-    ifeq ($$(art_target_or_host),target)
-      LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS)
-    else # host
-      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
-    endif
-    LOCAL_SHARED_LIBRARIES += libart
-  endif
   ifeq ($(ART_USE_PORTABLE_COMPILER),true)
     LOCAL_SHARED_LIBRARIES += libLLVM
     LOCAL_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index 8b5eba0f67..06e259a65f 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -26,83 +26,11 @@ namespace art {
 bool CodeLayout::Worker(const PassDataHolder* data) const {
   DCHECK(data != nullptr);
   const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
-  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
-  DCHECK(cUnit != nullptr);
+  CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+  DCHECK(c_unit != nullptr);
   BasicBlock* bb = pass_me_data_holder->bb;
   DCHECK(bb != nullptr);
-  cUnit->mir_graph->LayoutBlocks(bb);
-  // No need of repeating, so just return false.
-  return false;
-}
-
-/*
- * SSATransformation pass implementation start.
- */
-void SSATransformation::Start(const PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-  DCHECK(cUnit != nullptr);
-  cUnit->mir_graph->SSATransformationStart();
-}
-
-bool SSATransformation::Worker(const PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
-  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
-  DCHECK(cUnit != nullptr);
-  BasicBlock* bb = pass_me_data_holder->bb;
-  DCHECK(bb != nullptr);
-  cUnit->mir_graph->InsertPhiNodeOperands(bb);
-  // No need of repeating, so just return false.
-  return false;
-}
-
-void SSATransformation::End(const PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-  DCHECK(cUnit != nullptr);
-  cUnit->mir_graph->SSATransformationEnd();
-}
-
-/*
- * ConstantPropagation pass implementation start
- */
-bool ConstantPropagation::Worker(const PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
-  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
-  DCHECK(cUnit != nullptr);
-  BasicBlock* bb = pass_me_data_holder->bb;
-  DCHECK(bb != nullptr);
-  cUnit->mir_graph->DoConstantPropagation(bb);
-  // No need of repeating, so just return false.
-  return false;
-}
-
-/*
- * MethodUseCount pass implementation start.
- */
-bool MethodUseCount::Gate(const PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-  DCHECK(cUnit != nullptr);
-  // First initialize the data.
-  cUnit->mir_graph->InitializeMethodUses();
-
-  // Now check if the pass is to be ignored.
-  bool res = ((cUnit->disable_opt & (1 << kPromoteRegs)) == 0);
-
-  return res;
-}
-
-bool MethodUseCount::Worker(const PassDataHolder* data) const {
-  DCHECK(data != nullptr);
-  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
-  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
-  DCHECK(cUnit != nullptr);
-  BasicBlock* bb = pass_me_data_holder->bb;
-  DCHECK(bb != nullptr);
-  cUnit->mir_graph->CountUses(bb);
+  c_unit->mir_graph->LayoutBlocks(bb);
   // No need of repeating, so just return false.
   return false;
 }
@@ -113,11 +41,11 @@ bool MethodUseCount::Worker(const PassDataHolder* data) const {
 bool BBCombine::Worker(const PassDataHolder* data) const {
   DCHECK(data != nullptr);
   const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
-  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
-  DCHECK(cUnit != nullptr);
+  CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+  DCHECK(c_unit != nullptr);
   BasicBlock* bb = pass_me_data_holder->bb;
   DCHECK(bb != nullptr);
-  cUnit->mir_graph->CombineBlocks(bb);
+  c_unit->mir_graph->CombineBlocks(bb);
 
   // No need of repeating, so just return false.
   return false;
@@ -128,15 +56,15 @@ bool BBCombine::Worker(const PassDataHolder* data) const {
  */
 void BBOptimizations::Start(const PassDataHolder* data) const {
   DCHECK(data != nullptr);
-  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-  DCHECK(cUnit != nullptr);
+  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(c_unit != nullptr);
   /*
    * This pass has a different ordering depEnding on the suppress exception,
    * so do the pass here for now:
    *   - Later, the Start should just change the ordering and we can move the extended
    *     creation into the pass driver's main job with a new iterator
    */
-  cUnit->mir_graph->BasicBlockOptimization();
+  c_unit->mir_graph->BasicBlockOptimization();
 }
 
 }  // namespace art
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 3a529f2096..00947902e7 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -119,7 +119,7 @@ class CallInlining : public PassME {
  */
 class CodeLayout : public PassME {
  public:
-  CodeLayout() : PassME("CodeLayout", "2_post_layout_cfg") {
+  CodeLayout() : PassME("CodeLayout", kAllNodes, kOptimizationBasicBlockChange, "2_post_layout_cfg") {
   }
 
   void Start(const PassDataHolder* data) const {
@@ -133,72 +133,6 @@ class CodeLayout : public PassME {
 };
 
 /**
- * @class SSATransformation
- * @brief Perform an SSA representation pass on the CompilationUnit.
- */
-class SSATransformation : public PassME {
- public:
-  SSATransformation() : PassME("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
-  }
-
-  bool Worker(const PassDataHolder* data) const;
-
-  void Start(const PassDataHolder* data) const;
-
-  void End(const PassDataHolder* data) const;
-};
-
-/**
- * @class ConstantPropagation
- * @brief Perform a constant propagation pass.
- */
-class ConstantPropagation : public PassME {
- public:
-  ConstantPropagation() : PassME("ConstantPropagation") {
-  }
-
-  bool Worker(const PassDataHolder* data) const;
-
-  void Start(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(cUnit != nullptr);
-    cUnit->mir_graph->InitializeConstantPropagation();
-  }
-};
-
-/**
- * @class InitRegLocations
- * @brief Initialize Register Locations.
- */
-class InitRegLocations : public PassME {
- public:
-  InitRegLocations() : PassME("InitRegLocation", kNoNodes) {
-  }
-
-  void Start(const PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(cUnit != nullptr);
-    cUnit->mir_graph->InitRegLocations();
-  }
-};
-
-/**
- * @class MethodUseCount
- * @brief Count the register uses of the method
- */
-class MethodUseCount : public PassME {
- public:
-  MethodUseCount() : PassME("UseCount") {
-  }
-
-  bool Worker(const PassDataHolder* data) const;
-
-  bool Gate(const PassDataHolder* data) const;
-};
-
-/**
  * @class NullCheckEliminationAndTypeInference
  * @brief Null check elimination and type inference.
  */
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 767ffbf432..eb48cc3783 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -25,6 +25,7 @@ enum RegisterClass {
   kInvalidRegClass,
   kCoreReg,
   kFPReg,
+  kRefReg,
   kAnyReg,
 };
 
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 35d777ec7a..66fb608d39 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -88,6 +88,7 @@ struct CompilationUnit {
   std::unique_ptr<MIRGraph> mir_graph;   // MIR container.
   std::unique_ptr<Backend> cg;           // Target-specific codegen.
   TimingLogger timings;
+  bool print_pass;                 // Do we want to print a pass or not?
 };
 
 }  // namespace art
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index c3f694da50..58d2ed2f0d 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -21,7 +21,7 @@
 #include "dataflow_iterator-inl.h"
 #include "leb128.h"
 #include "mirror/object.h"
-#include "pass_driver_me.h"
+#include "pass_driver_me_opts.h"
 #include "runtime.h"
 #include "base/logging.h"
 #include "base/timing_logger.h"
@@ -105,7 +105,8 @@ CompilationUnit::CompilationUnit(ArenaPool* pool)
     arena_stack(pool),
     mir_graph(nullptr),
     cg(nullptr),
-    timings("QuickCompiler", true, false) {
+    timings("QuickCompiler", true, false),
+    print_pass(false) {
 }
 
 CompilationUnit::~CompilationUnit() {
@@ -133,39 +134,133 @@ void CompilationUnit::EndTiming() {
   }
 }
 
+// Enable opcodes that mostly work, but produce assertion errors (thus breaking libartd.so).
+#define ARM64_USE_EXPERIMENTAL_OPCODES 0
+
 // TODO: Remove this when we are able to compile everything.
 int arm64_support_list[] = {
     Instruction::NOP,
     Instruction::MOVE,
     Instruction::MOVE_FROM16,
     Instruction::MOVE_16,
+    Instruction::MOVE_EXCEPTION,
+    Instruction::RETURN_VOID,
+    Instruction::RETURN,
+    Instruction::RETURN_WIDE,
+    Instruction::CONST_4,
+    Instruction::CONST_16,
+    Instruction::CONST,
+    Instruction::CONST_STRING,
+    Instruction::MONITOR_ENTER,
+    Instruction::MONITOR_EXIT,
+    Instruction::THROW,
+    Instruction::GOTO,
+    Instruction::GOTO_16,
+    Instruction::GOTO_32,
+    Instruction::IF_EQ,
+    Instruction::IF_NE,
+    Instruction::IF_LT,
+    Instruction::IF_GE,
+    Instruction::IF_GT,
+    Instruction::IF_LE,
+    Instruction::IF_EQZ,
+    Instruction::IF_NEZ,
+    Instruction::IF_LTZ,
+    Instruction::IF_GEZ,
+    Instruction::IF_GTZ,
+    Instruction::IF_LEZ,
+    Instruction::NEG_INT,
+    Instruction::NOT_INT,
+    Instruction::NEG_FLOAT,
+    Instruction::INT_TO_BYTE,
+    Instruction::INT_TO_CHAR,
+    Instruction::INT_TO_SHORT,
+    Instruction::ADD_INT,
+    Instruction::SUB_INT,
+    Instruction::MUL_INT,
+    Instruction::DIV_INT,
+    Instruction::REM_INT,
+    Instruction::AND_INT,
+    Instruction::OR_INT,
+    Instruction::XOR_INT,
+    Instruction::SHL_INT,
+    Instruction::SHR_INT,
+    Instruction::USHR_INT,
+    Instruction::ADD_FLOAT,
+    Instruction::SUB_FLOAT,
+    Instruction::MUL_FLOAT,
+    Instruction::DIV_FLOAT,
+    Instruction::ADD_INT_2ADDR,
+    Instruction::SUB_INT_2ADDR,
+    Instruction::MUL_INT_2ADDR,
+    Instruction::DIV_INT_2ADDR,
+    Instruction::REM_INT_2ADDR,
+    Instruction::AND_INT_2ADDR,
+    Instruction::OR_INT_2ADDR,
+    Instruction::XOR_INT_2ADDR,
+    Instruction::SHL_INT_2ADDR,
+    Instruction::SHR_INT_2ADDR,
+    Instruction::USHR_INT_2ADDR,
+    Instruction::ADD_FLOAT_2ADDR,
+    Instruction::SUB_FLOAT_2ADDR,
+    Instruction::MUL_FLOAT_2ADDR,
+    Instruction::DIV_FLOAT_2ADDR,
+    Instruction::ADD_INT_LIT16,
+    Instruction::RSUB_INT,
+    Instruction::MUL_INT_LIT16,
+    Instruction::DIV_INT_LIT16,
+    Instruction::REM_INT_LIT16,
+    Instruction::AND_INT_LIT16,
+    Instruction::OR_INT_LIT16,
+    Instruction::XOR_INT_LIT16,
+    Instruction::ADD_INT_LIT8,
+    Instruction::RSUB_INT_LIT8,
+    Instruction::MUL_INT_LIT8,
+    Instruction::DIV_INT_LIT8,
+    Instruction::REM_INT_LIT8,
+    Instruction::AND_INT_LIT8,
+    Instruction::OR_INT_LIT8,
+    Instruction::XOR_INT_LIT8,
+    Instruction::SHL_INT_LIT8,
+    Instruction::SHR_INT_LIT8,
+    Instruction::USHR_INT_LIT8,
+    // TODO(Arm64): Enable compiler pass
+    // ----- ExtendedMIROpcode -----
+    kMirOpPhi,
+    kMirOpCopy,
+    kMirOpFusedCmplFloat,
+    kMirOpFusedCmpgFloat,
+    kMirOpFusedCmplDouble,
+    kMirOpFusedCmpgDouble,
+    kMirOpFusedCmpLong,
+    kMirOpNop,
+    kMirOpNullCheck,
+    kMirOpRangeCheck,
+    kMirOpDivZeroCheck,
+    kMirOpCheck,
+    kMirOpCheckPart2,
+    kMirOpSelect,
+
+#if ARM64_USE_EXPERIMENTAL_OPCODES
     Instruction::MOVE_WIDE,
     Instruction::MOVE_WIDE_FROM16,
     Instruction::MOVE_WIDE_16,
     Instruction::MOVE_OBJECT,
     Instruction::MOVE_OBJECT_FROM16,
     Instruction::MOVE_OBJECT_16,
+    // Instruction::PACKED_SWITCH,
+    // Instruction::SPARSE_SWITCH,
     // Instruction::MOVE_RESULT,
     // Instruction::MOVE_RESULT_WIDE,
     // Instruction::MOVE_RESULT_OBJECT,
-    Instruction::MOVE_EXCEPTION,
-    Instruction::RETURN_VOID,
-    Instruction::RETURN,
-    Instruction::RETURN_WIDE,
     // Instruction::RETURN_OBJECT,
-    // Instruction::CONST_4,
-    // Instruction::CONST_16,
-    // Instruction::CONST,
     // Instruction::CONST_HIGH16,
     // Instruction::CONST_WIDE_16,
     // Instruction::CONST_WIDE_32,
     // Instruction::CONST_WIDE,
     // Instruction::CONST_WIDE_HIGH16,
-    // Instruction::CONST_STRING,
     // Instruction::CONST_STRING_JUMBO,
     // Instruction::CONST_CLASS,
-    Instruction::MONITOR_ENTER,
-    Instruction::MONITOR_EXIT,
     // Instruction::CHECK_CAST,
     // Instruction::INSTANCE_OF,
     // Instruction::ARRAY_LENGTH,
@@ -174,29 +269,11 @@ int arm64_support_list[] = {
     // Instruction::FILLED_NEW_ARRAY,
     // Instruction::FILLED_NEW_ARRAY_RANGE,
     // Instruction::FILL_ARRAY_DATA,
-    Instruction::THROW,
-    // Instruction::GOTO,
-    // Instruction::GOTO_16,
-    // Instruction::GOTO_32,
-    // Instruction::PACKED_SWITCH,
-    // Instruction::SPARSE_SWITCH,
     Instruction::CMPL_FLOAT,
     Instruction::CMPG_FLOAT,
     Instruction::CMPL_DOUBLE,
     Instruction::CMPG_DOUBLE,
     Instruction::CMP_LONG,
-    // Instruction::IF_EQ,
-    // Instruction::IF_NE,
-    // Instruction::IF_LT,
-    // Instruction::IF_GE,
-    // Instruction::IF_GT,
-    // Instruction::IF_LE,
-    // Instruction::IF_EQZ,
-    // Instruction::IF_NEZ,
-    // Instruction::IF_LTZ,
-    // Instruction::IF_GEZ,
-    // Instruction::IF_GTZ,
-    // Instruction::IF_LEZ,
     // Instruction::UNUSED_3E,
     // Instruction::UNUSED_3F,
     // Instruction::UNUSED_40,
@@ -258,11 +335,8 @@ int arm64_support_list[] = {
     // Instruction::INVOKE_INTERFACE_RANGE,
     // Instruction::UNUSED_79,
     // Instruction::UNUSED_7A,
-    Instruction::NEG_INT,
-    Instruction::NOT_INT,
     Instruction::NEG_LONG,
     Instruction::NOT_LONG,
-    Instruction::NEG_FLOAT,
     Instruction::NEG_DOUBLE,
     Instruction::INT_TO_LONG,
     Instruction::INT_TO_FLOAT,
@@ -276,20 +350,6 @@ int arm64_support_list[] = {
     Instruction::DOUBLE_TO_INT,
     Instruction::DOUBLE_TO_LONG,
     Instruction::DOUBLE_TO_FLOAT,
-    Instruction::INT_TO_BYTE,
-    Instruction::INT_TO_CHAR,
-    Instruction::INT_TO_SHORT,
-    Instruction::ADD_INT,
-    Instruction::SUB_INT,
-    Instruction::MUL_INT,
-    Instruction::DIV_INT,
-    Instruction::REM_INT,
-    Instruction::AND_INT,
-    Instruction::OR_INT,
-    Instruction::XOR_INT,
-    Instruction::SHL_INT,
-    Instruction::SHR_INT,
-    Instruction::USHR_INT,
     Instruction::ADD_LONG,
     Instruction::SUB_LONG,
     Instruction::MUL_LONG,
@@ -301,27 +361,12 @@ int arm64_support_list[] = {
     Instruction::SHL_LONG,
     Instruction::SHR_LONG,
     Instruction::USHR_LONG,
-    Instruction::ADD_FLOAT,
-    Instruction::SUB_FLOAT,
-    Instruction::MUL_FLOAT,
-    Instruction::DIV_FLOAT,
     // Instruction::REM_FLOAT,
     Instruction::ADD_DOUBLE,
     Instruction::SUB_DOUBLE,
     Instruction::MUL_DOUBLE,
     Instruction::DIV_DOUBLE,
     // Instruction::REM_DOUBLE,
-    Instruction::ADD_INT_2ADDR,
-    Instruction::SUB_INT_2ADDR,
-    Instruction::MUL_INT_2ADDR,
-    Instruction::DIV_INT_2ADDR,
-    Instruction::REM_INT_2ADDR,
-    Instruction::AND_INT_2ADDR,
-    Instruction::OR_INT_2ADDR,
-    Instruction::XOR_INT_2ADDR,
-    Instruction::SHL_INT_2ADDR,
-    Instruction::SHR_INT_2ADDR,
-    Instruction::USHR_INT_2ADDR,
     Instruction::ADD_LONG_2ADDR,
     Instruction::SUB_LONG_2ADDR,
     Instruction::MUL_LONG_2ADDR,
@@ -333,35 +378,12 @@ int arm64_support_list[] = {
     Instruction::SHL_LONG_2ADDR,
     Instruction::SHR_LONG_2ADDR,
     Instruction::USHR_LONG_2ADDR,
-    Instruction::ADD_FLOAT_2ADDR,
-    Instruction::SUB_FLOAT_2ADDR,
-    Instruction::MUL_FLOAT_2ADDR,
-    Instruction::DIV_FLOAT_2ADDR,
     // Instruction::REM_FLOAT_2ADDR,
     Instruction::ADD_DOUBLE_2ADDR,
     Instruction::SUB_DOUBLE_2ADDR,
     Instruction::MUL_DOUBLE_2ADDR,
     Instruction::DIV_DOUBLE_2ADDR,
     // Instruction::REM_DOUBLE_2ADDR,
-    Instruction::ADD_INT_LIT16,
-    Instruction::RSUB_INT,
-    Instruction::MUL_INT_LIT16,
-    Instruction::DIV_INT_LIT16,
-    Instruction::REM_INT_LIT16,
-    Instruction::AND_INT_LIT16,
-    Instruction::OR_INT_LIT16,
-    Instruction::XOR_INT_LIT16,
-    Instruction::ADD_INT_LIT8,
-    Instruction::RSUB_INT_LIT8,
-    Instruction::MUL_INT_LIT8,
-    Instruction::DIV_INT_LIT8,
-    Instruction::REM_INT_LIT8,
-    Instruction::AND_INT_LIT8,
-    Instruction::OR_INT_LIT8,
-    Instruction::XOR_INT_LIT8,
-    Instruction::SHL_INT_LIT8,
-    Instruction::SHR_INT_LIT8,
-    Instruction::USHR_INT_LIT8,
     // Instruction::IGET_QUICK,
     // Instruction::IGET_WIDE_QUICK,
     // Instruction::IGET_OBJECT_QUICK,
@@ -391,23 +413,7 @@ int arm64_support_list[] = {
     // Instruction::UNUSED_FD,
     // Instruction::UNUSED_FE,
     // Instruction::UNUSED_FF,
-
-    // ----- ExtendedMIROpcode -----
-    // kMirOpPhi,
-    // kMirOpCopy,
-    // kMirOpFusedCmplFloat,
-    // kMirOpFusedCmpgFloat,
-    // kMirOpFusedCmplDouble,
-    // kMirOpFusedCmpgDouble,
-    // kMirOpFusedCmpLong,
-    // kMirOpNop,
-    // kMirOpNullCheck,
-    // kMirOpRangeCheck,
-    kMirOpDivZeroCheck,
-    kMirOpCheck,
-    // kMirOpCheckPart2,
-    // kMirOpSelect,
-    // kMirOpLast,
+#endif /* ARM64_USE_EXPERIMENTAL_OPCODES */
 };
 
 // TODO: Remove this when we are able to compile everything.
@@ -749,7 +755,7 @@ static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
     }
 
     for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
-      BasicBlock *bb = cu.mir_graph->GetBasicBlock(idx);
+      BasicBlock* bb = cu.mir_graph->GetBasicBlock(idx);
       if (bb == NULL) continue;
       if (bb->block_type == kDead) continue;
       for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
@@ -757,7 +763,7 @@ static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
         // Check if we support the byte code.
         if (std::find(support_list, support_list + support_list_size,
             opcode) == support_list + support_list_size) {
-          if (opcode < kMirOpFirst) {
+          if (!cu.mir_graph->IsPseudoMirOp(opcode)) {
             VLOG(compiler) << "Unsupported dalvik byte code : "
                            << mir->dalvikInsn.opcode;
           } else {
@@ -925,7 +931,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver,
   }
 
   /* Create the pass driver and launch it */
-  PassDriverME pass_driver(&cu);
+  PassDriverMEOpts pass_driver(&cu);
   pass_driver.Launch();
 
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index c0068b2331..62594963fc 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -21,8 +21,48 @@
 
 namespace art {
 
-uint16_t LocalValueNumbering::GetFieldId(const DexFile* dex_file, uint16_t field_idx) {
-  FieldReference key = { dex_file, field_idx };
+namespace {  // anonymous namespace
+
+// Operations used for value map keys instead of actual opcode.
+static constexpr uint16_t kInvokeMemoryVersionBumpOp = Instruction::INVOKE_DIRECT;
+static constexpr uint16_t kUnresolvedSFieldOp = Instruction::SPUT;
+static constexpr uint16_t kResolvedSFieldOp = Instruction::SGET;
+static constexpr uint16_t kUnresolvedIFieldOp = Instruction::IPUT;
+static constexpr uint16_t kNonAliasingIFieldOp = Instruction::IGET;
+static constexpr uint16_t kAliasingIFieldOp = Instruction::IGET_WIDE;
+static constexpr uint16_t kAliasingIFieldStartVersionOp = Instruction::IGET_WIDE;
+static constexpr uint16_t kAliasingIFieldBumpVersionOp = Instruction::IGET_OBJECT;
+static constexpr uint16_t kArrayAccessLocOp = Instruction::APUT;
+static constexpr uint16_t kNonAliasingArrayOp = Instruction::AGET;
+static constexpr uint16_t kNonAliasingArrayStartVersionOp = Instruction::AGET_WIDE;
+static constexpr uint16_t kAliasingArrayOp = Instruction::AGET_OBJECT;
+static constexpr uint16_t kAliasingArrayMemoryVersionOp = Instruction::AGET_BOOLEAN;
+static constexpr uint16_t kAliasingArrayBumpVersionOp = Instruction::AGET_BYTE;
+
+}  // anonymous namespace
+
+LocalValueNumbering::LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator)
+    : cu_(cu),
+      last_value_(0u),
+      sreg_value_map_(std::less<uint16_t>(), allocator->Adapter()),
+      sreg_wide_value_map_(std::less<uint16_t>(), allocator->Adapter()),
+      value_map_(std::less<uint64_t>(), allocator->Adapter()),
+      global_memory_version_(0u),
+      aliasing_ifield_version_map_(std::less<uint16_t>(), allocator->Adapter()),
+      non_aliasing_array_version_map_(std::less<uint16_t>(), allocator->Adapter()),
+      field_index_map_(FieldReferenceComparator(), allocator->Adapter()),
+      non_aliasing_refs_(std::less<uint16_t>(), allocator->Adapter()),
+      non_aliasing_ifields_(NonAliasingIFieldKeyComparator(), allocator->Adapter()),
+      escaped_array_refs_(EscapedArrayKeyComparator(), allocator->Adapter()),
+      range_checked_(RangeCheckKeyComparator() , allocator->Adapter()),
+      null_checked_(std::less<uint16_t>(), allocator->Adapter()) {
+  std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u);
+  std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u);
+  std::fill_n(aliasing_array_version_, kFieldTypeCount, 0u);
+}
+
+uint16_t LocalValueNumbering::GetFieldId(const MirFieldInfo& field_info) {
+  FieldReference key = { field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex() };
   auto it = field_index_map_.find(key);
   if (it != field_index_map_.end()) {
     return it->second;
@@ -32,62 +72,6 @@ uint16_t LocalValueNumbering::GetFieldId(const DexFile* dex_file, uint16_t field
   return id;
 }
 
-void LocalValueNumbering::AdvanceGlobalMemory() {
-  // See AdvanceMemoryVersion() for explanation.
-  global_memory_version_ = next_memory_version_;
-  ++next_memory_version_;
-}
-
-uint16_t LocalValueNumbering::GetMemoryVersion(uint16_t base, uint16_t field, uint16_t type) {
-  // See AdvanceMemoryVersion() for explanation.
-  MemoryVersionKey key = { base, field, type };
-  MemoryVersionMap::iterator it = memory_version_map_.find(key);
-  uint16_t memory_version = (it != memory_version_map_.end()) ? it->second : 0u;
-  if (base != NO_VALUE && non_aliasing_refs_.find(base) == non_aliasing_refs_.end()) {
-    // Check modifications by potentially aliased access.
-    MemoryVersionKey aliased_access_key = { NO_VALUE, field, type };
-    auto aa_it = memory_version_map_.find(aliased_access_key);
-    if (aa_it != memory_version_map_.end() && aa_it->second > memory_version) {
-      memory_version = aa_it->second;
-    }
-    memory_version = std::max(memory_version, global_memory_version_);
-  } else if (base != NO_VALUE) {
-    // Ignore global_memory_version_ for access via unique references.
-  } else {
-    memory_version = std::max(memory_version, global_memory_version_);
-  }
-  return memory_version;
-};
-
-uint16_t LocalValueNumbering::AdvanceMemoryVersion(uint16_t base, uint16_t field, uint16_t type) {
-  // When we read the same value from memory, we want to assign the same value name to it.
-  // However, we need to be careful not to assign the same value name if the memory location
-  // may have been written to between the reads. To avoid that we do "memory versioning".
-  //
-  // For each write to a memory location (instance field, static field, array element) we assign
-  // a new memory version number to the location identified by the value name of the base register,
-  // the field id and type, or "{ base, field, type }". For static fields the "base" is NO_VALUE
-  // since they are not accessed via a reference. For arrays the "field" is NO_VALUE since they
-  // don't have a field id.
-  //
-  // To account for the possibility of aliased access to the same memory location via different
-  // "base", we also store the memory version number with the key "{ NO_VALUE, field, type }"
-  // if "base" is an aliasing reference and check it in GetMemoryVersion() on reads via
-  // aliasing references. A global memory version is set for method calls as a method can
-  // potentially write to any memory location accessed via an aliasing reference.
-
-  uint16_t result = next_memory_version_;
-  ++next_memory_version_;
-  MemoryVersionKey key = { base, field, type };
-  memory_version_map_.Overwrite(key, result);
-  if (base != NO_VALUE && non_aliasing_refs_.find(base) == non_aliasing_refs_.end()) {
-    // Advance memory version for aliased access.
-    MemoryVersionKey aliased_access_key = { NO_VALUE, field, type };
-    memory_version_map_.Overwrite(aliased_access_key, result);
-  }
-  return result;
-};
-
 uint16_t LocalValueNumbering::MarkNonAliasingNonNull(MIR* mir) {
   uint16_t res = GetOperandValue(mir->ssa_rep->defs[0]);
   SetOperandValue(mir->ssa_rep->defs[0], res);
@@ -97,43 +81,332 @@ uint16_t LocalValueNumbering::MarkNonAliasingNonNull(MIR* mir) {
   return res;
 }
 
-void LocalValueNumbering::MakeArgsAliasing(MIR* mir) {
-  for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
-    uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]);
-    non_aliasing_refs_.erase(reg);
+bool LocalValueNumbering::IsNonAliasing(uint16_t reg) {
+  return non_aliasing_refs_.find(reg) != non_aliasing_refs_.end();
+}
+
+bool LocalValueNumbering::IsNonAliasingIField(uint16_t reg, uint16_t field_id, uint16_t type) {
+  if (IsNonAliasing(reg)) {
+    return true;
+  }
+  NonAliasingIFieldKey key = { reg, field_id, type };
+  return non_aliasing_ifields_.count(key) != 0u;
+}
+
+bool LocalValueNumbering::IsNonAliasingArray(uint16_t reg, uint16_t type) {
+  if (IsNonAliasing(reg)) {
+    return true;
   }
+  EscapedArrayKey key = { reg, type };
+  return escaped_array_refs_.count(key) != 0u;
 }
 
+
 void LocalValueNumbering::HandleNullCheck(MIR* mir, uint16_t reg) {
-  if (null_checked_.find(reg) != null_checked_.end()) {
-    if (cu_->verbose) {
-      LOG(INFO) << "Removing null check for 0x" << std::hex << mir->offset;
+  auto lb = null_checked_.lower_bound(reg);
+  if (lb != null_checked_.end() && *lb == reg) {
+    if (LIKELY(Good())) {
+      if (cu_->verbose) {
+        LOG(INFO) << "Removing null check for 0x" << std::hex << mir->offset;
+      }
+      mir->optimization_flags |= MIR_IGNORE_NULL_CHECK;
     }
-    mir->optimization_flags |= MIR_IGNORE_NULL_CHECK;
   } else {
-    null_checked_.insert(reg);
+    null_checked_.insert(lb, reg);
   }
 }
 
 void LocalValueNumbering::HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index) {
-  if (ValueExists(ARRAY_REF, array, index, NO_VALUE)) {
-    if (cu_->verbose) {
-      LOG(INFO) << "Removing range check for 0x" << std::hex << mir->offset;
+  RangeCheckKey key = { array, index };
+  auto lb = range_checked_.lower_bound(key);
+  if (lb != range_checked_.end() && !RangeCheckKeyComparator()(key, *lb)) {
+    if (LIKELY(Good())) {
+      if (cu_->verbose) {
+        LOG(INFO) << "Removing range check for 0x" << std::hex << mir->offset;
+      }
+      mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
     }
-    mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
+  } else {
+    // Mark range check completed.
+    range_checked_.insert(lb, key);
   }
-  // Use side effect to note range check completed.
-  (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
 }
 
 void LocalValueNumbering::HandlePutObject(MIR* mir) {
   // If we're storing a non-aliasing reference, stop tracking it as non-aliasing now.
   uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]);
-  non_aliasing_refs_.erase(base);
+  HandleEscapingRef(base);
+}
+
+void LocalValueNumbering::HandleEscapingRef(uint16_t base) {
+  auto it = non_aliasing_refs_.find(base);
+  if (it != non_aliasing_refs_.end()) {
+    uint64_t iget_key = BuildKey(Instruction::IGET, base, 0u, 0u);
+    for (auto iget_it = value_map_.lower_bound(iget_key), iget_end = value_map_.end();
+        iget_it != iget_end && EqualOpAndOperand1(iget_it->first, iget_key); ++iget_it) {
+      uint16_t field_id = ExtractOperand2(iget_it->first);
+      uint16_t type = ExtractModifier(iget_it->first);
+      NonAliasingIFieldKey key = { base, field_id, type };
+      non_aliasing_ifields_.insert(key);
+    }
+    uint64_t aget_key = BuildKey(kNonAliasingArrayStartVersionOp, base, 0u, 0u);
+    auto aget_it = value_map_.lower_bound(aget_key);
+    if (aget_it != value_map_.end() && EqualOpAndOperand1(aget_key, aget_it->first)) {
+      DCHECK_EQ(ExtractOperand2(aget_it->first), kNoValue);
+      uint16_t type = ExtractModifier(aget_it->first);
+      EscapedArrayKey key = { base, type };
+      escaped_array_refs_.insert(key);
+    }
+    non_aliasing_refs_.erase(it);
+  }
+}
+
+uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) {
+  // uint16_t type = opcode - Instruction::AGET;
+  uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]);
+  HandleNullCheck(mir, array);
+  uint16_t index = GetOperandValue(mir->ssa_rep->uses[1]);
+  HandleRangeCheck(mir, array, index);
+  uint16_t type = opcode - Instruction::AGET;
+  // Establish value number for loaded register.
+  uint16_t res;
+  if (IsNonAliasingArray(array, type)) {
+    // Get the start version that accounts for aliasing within the array (different index names).
+    uint16_t start_version = LookupValue(kNonAliasingArrayStartVersionOp, array, kNoValue, type);
+    // Find the current version from the non_aliasing_array_version_map_.
+    uint16_t memory_version = start_version;
+    auto it = non_aliasing_array_version_map_.find(start_version);
+    if (it != non_aliasing_array_version_map_.end()) {
+      memory_version = it->second;
+    } else {
+      // Just use the start_version.
+    }
+    res = LookupValue(kNonAliasingArrayOp, array, index, memory_version);
+  } else {
+    // Get the memory version of aliased array accesses of this type.
+    uint16_t memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_,
+                                          aliasing_array_version_[type], kNoValue);
+    res = LookupValue(kAliasingArrayOp, array, index, memory_version);
+  }
+  if (opcode == Instruction::AGET_WIDE) {
+    SetOperandValueWide(mir->ssa_rep->defs[0], res);
+  } else {
+    SetOperandValue(mir->ssa_rep->defs[0], res);
+  }
+  return res;
+}
+
+void LocalValueNumbering::HandleAPut(MIR* mir, uint16_t opcode) {
+  int array_idx = (opcode == Instruction::APUT_WIDE) ? 2 : 1;
+  int index_idx = array_idx + 1;
+  uint16_t array = GetOperandValue(mir->ssa_rep->uses[array_idx]);
+  HandleNullCheck(mir, array);
+  uint16_t index = GetOperandValue(mir->ssa_rep->uses[index_idx]);
+  HandleRangeCheck(mir, array, index);
+
+  uint16_t type = opcode - Instruction::APUT;
+  uint16_t value = (opcode == Instruction::APUT_WIDE)
+                   ? GetOperandValueWide(mir->ssa_rep->uses[0])
+                   : GetOperandValue(mir->ssa_rep->uses[0]);
+  if (IsNonAliasing(array)) {
+    // Get the start version that accounts for aliasing within the array (different index values).
+    uint16_t start_version = LookupValue(kNonAliasingArrayStartVersionOp, array, kNoValue, type);
+    auto it = non_aliasing_array_version_map_.find(start_version);
+    uint16_t memory_version = start_version;
+    if (it != non_aliasing_array_version_map_.end()) {
+      memory_version = it->second;
+    }
+    // We need to take 4 values (array, index, memory_version, value) into account for bumping
+    // the memory version but the key can take only 3. Merge array and index into a location.
+    uint16_t array_access_location = LookupValue(kArrayAccessLocOp, array, index, kNoValue);
+    // Bump the version, adding to the chain.
+    memory_version = LookupValue(kAliasingArrayBumpVersionOp, memory_version,
+                                 array_access_location, value);
+    non_aliasing_array_version_map_.Overwrite(start_version, memory_version);
+    StoreValue(kNonAliasingArrayOp, array, index, memory_version, value);
+  } else {
+    // Get the memory version based on global_memory_version_ and aliasing_array_version_[type].
+    uint16_t memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_,
+                                          aliasing_array_version_[type], kNoValue);
+    if (HasValue(kAliasingArrayOp, array, index, memory_version, value)) {
+      // This APUT can be eliminated, it stores the same value that's already in the field.
+      // TODO: Eliminate the APUT.
+      return;
+    }
+    // We need to take 4 values (array, index, memory_version, value) into account for bumping
+    // the memory version but the key can take only 3. Merge array and index into a location.
+    uint16_t array_access_location = LookupValue(kArrayAccessLocOp, array, index, kNoValue);
+    // Bump the version, adding to the chain.
+    uint16_t bumped_version = LookupValue(kAliasingArrayBumpVersionOp, memory_version,
+                                          array_access_location, value);
+    aliasing_array_version_[type] = bumped_version;
+    memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_,
+                                 bumped_version, kNoValue);
+    StoreValue(kAliasingArrayOp, array, index, memory_version, value);
+
+    // Clear escaped array refs for this type.
+    EscapedArrayKey array_key = { type, 0u };
+    auto it = escaped_array_refs_.lower_bound(array_key), end = escaped_array_refs_.end();
+    while (it != end && it->type == type) {
+      it = escaped_array_refs_.erase(it);
+    }
+  }
+}
+
+uint16_t LocalValueNumbering::HandleIGet(MIR* mir, uint16_t opcode) {
+  uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]);
+  HandleNullCheck(mir, base);
+  const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir);
+  uint16_t res;
+  if (!field_info.IsResolved() || field_info.IsVolatile()) {
+    // Volatile fields always get a new memory version; field id is irrelevant.
+    // Unresolved fields may be volatile, so handle them as such to be safe.
+    // Use result s_reg - will be unique.
+    res = LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
+  } else {
+    uint16_t type = opcode - Instruction::IGET;
+    uint16_t field_id = GetFieldId(field_info);
+    if (IsNonAliasingIField(base, field_id, type)) {
+      res = LookupValue(kNonAliasingIFieldOp, base, field_id, type);
+    } else {
+      // Get the start version that accounts for aliasing with unresolved fields of the same type
+      // and make it unique for the field by including the field_id.
+      uint16_t start_version = LookupValue(kAliasingIFieldStartVersionOp, global_memory_version_,
+                                           unresolved_ifield_version_[type], field_id);
+      // Find the current version from the aliasing_ifield_version_map_.
+      uint16_t memory_version = start_version;
+      auto version_it = aliasing_ifield_version_map_.find(start_version);
+      if (version_it != aliasing_ifield_version_map_.end()) {
+        memory_version = version_it->second;
+      } else {
+        // Just use the start_version.
+      }
+      res = LookupValue(kAliasingIFieldOp, base, field_id, memory_version);
+    }
+  }
+  if (opcode == Instruction::IGET_WIDE) {
+    SetOperandValueWide(mir->ssa_rep->defs[0], res);
+  } else {
+    SetOperandValue(mir->ssa_rep->defs[0], res);
+  }
+  return res;
+}
+
+void LocalValueNumbering::HandleIPut(MIR* mir, uint16_t opcode) {
+  uint16_t type = opcode - Instruction::IPUT;
+  int base_reg = (opcode == Instruction::IPUT_WIDE) ? 2 : 1;
+  uint16_t base = GetOperandValue(mir->ssa_rep->uses[base_reg]);
+  HandleNullCheck(mir, base);
+  const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir);
+  if (!field_info.IsResolved()) {
+    // Unresolved fields always alias with everything of the same type.
+    // Use mir->offset as modifier; without elaborate inlining, it will be unique.
+    unresolved_ifield_version_[type] =
+        LookupValue(kUnresolvedIFieldOp, kNoValue, kNoValue, mir->offset);
+
+    // Treat fields of escaped references of the same type as potentially modified.
+    NonAliasingIFieldKey key = { type, 0u, 0u };  // lowest possible key of this type.
+    auto it = non_aliasing_ifields_.lower_bound(key), end = non_aliasing_ifields_.end();
+    while (it != end && it->type == type) {
+      it = non_aliasing_ifields_.erase(it);
+    }
+  } else if (field_info.IsVolatile()) {
+    // Nothing to do, resolved volatile fields always get a new memory version anyway and
+    // can't alias with resolved non-volatile fields.
+  } else {
+    uint16_t field_id = GetFieldId(field_info);
+    uint16_t value = (opcode == Instruction::IPUT_WIDE)
+                     ? GetOperandValueWide(mir->ssa_rep->uses[0])
+                     : GetOperandValue(mir->ssa_rep->uses[0]);
+    if (IsNonAliasing(base)) {
+      StoreValue(kNonAliasingIFieldOp, base, field_id, type, value);
+    } else {
+      // Get the start version that accounts for aliasing with unresolved fields of the same type
+      // and make it unique for the field by including the field_id.
+      uint16_t start_version = LookupValue(kAliasingIFieldStartVersionOp, global_memory_version_,
+                                           unresolved_ifield_version_[type], field_id);
+      // Find the old version from the aliasing_ifield_version_map_.
+      uint16_t old_version = start_version;
+      auto version_it = aliasing_ifield_version_map_.find(start_version);
+      if (version_it != aliasing_ifield_version_map_.end()) {
+        old_version = version_it->second;
+      }
+      // Check if the field currently contains the value, making this a NOP.
+      if (HasValue(kAliasingIFieldOp, base, field_id, old_version, value)) {
+        // This IPUT can be eliminated, it stores the same value that's already in the field.
+        // TODO: Eliminate the IPUT.
+        return;
+      }
+      // Bump the version, adding to the chain started by start_version.
+      uint16_t memory_version = LookupValue(kAliasingIFieldBumpVersionOp, old_version, base, value);
+      // Update the aliasing_ifield_version_map_ so that HandleIGet() can get the memory_version
+      // without knowing the values used to build the chain.
+      aliasing_ifield_version_map_.Overwrite(start_version, memory_version);
+      StoreValue(kAliasingIFieldOp, base, field_id, memory_version, value);
+
+      // Clear non-aliasing fields for this field_id.
+      NonAliasingIFieldKey field_key = { type, field_id, 0u };
+      auto it = non_aliasing_ifields_.lower_bound(field_key), end = non_aliasing_ifields_.end();
+      while (it != end && it->field_id == field_id) {
+        DCHECK_EQ(type, it->type);
+        it = non_aliasing_ifields_.erase(it);
+      }
+    }
+  }
+}
+
+uint16_t LocalValueNumbering::HandleSGet(MIR* mir, uint16_t opcode) {
+  const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir);
+  uint16_t res;
+  if (!field_info.IsResolved() || field_info.IsVolatile()) {
+    // Volatile fields always get a new memory version; field id is irrelevant.
+    // Unresolved fields may be volatile, so handle them as such to be safe.
+    // Use result s_reg - will be unique.
+    res = LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue);
+  } else {
+    uint16_t field_id = GetFieldId(field_info);
+    // Resolved non-volatile static fields can alias with non-resolved fields of the same type,
+    // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_
+    // to determine the version of the field.
+    uint16_t type = opcode - Instruction::SGET;
+    res = LookupValue(kResolvedSFieldOp, field_id,
+                      unresolved_sfield_version_[type], global_memory_version_);
+  }
+  if (opcode == Instruction::SGET_WIDE) {
+    SetOperandValueWide(mir->ssa_rep->defs[0], res);
+  } else {
+    SetOperandValue(mir->ssa_rep->defs[0], res);
+  }
+  return res;
+}
+
+void LocalValueNumbering::HandleSPut(MIR* mir, uint16_t opcode) {
+  uint16_t type = opcode - Instruction::SPUT;
+  const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir);
+  if (!field_info.IsResolved()) {
+    // Unresolved fields always alias with everything of the same type.
+    // Use mir->offset as modifier; without elaborate inlining, it will be unique.
+    unresolved_sfield_version_[type] =
+        LookupValue(kUnresolvedSFieldOp, kNoValue, kNoValue, mir->offset);
+  } else if (field_info.IsVolatile()) {
+    // Nothing to do, resolved volatile fields always get a new memory version anyway and
+    // can't alias with resolved non-volatile fields.
+  } else {
+    uint16_t field_id = GetFieldId(field_info);
+    uint16_t value = (opcode == Instruction::SPUT_WIDE)
+                     ? GetOperandValueWide(mir->ssa_rep->uses[0])
+                     : GetOperandValue(mir->ssa_rep->uses[0]);
+    // Resolved non-volatile static fields can alias with non-resolved fields of the same type,
+    // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_
+    // to determine the version of the field.
+    uint16_t type = opcode - Instruction::SGET;
+    StoreValue(kResolvedSFieldOp, field_id,
+               unresolved_sfield_version_[type], global_memory_version_, value);
+  }
 }
 
 uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
-  uint16_t res = NO_VALUE;
+  uint16_t res = kNoValue;
   uint16_t opcode = mir->dalvikInsn.opcode;
   switch (opcode) {
     case Instruction::NOP:
@@ -176,9 +449,14 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
       // Nothing defined but the result will be unique and non-null.
       if (mir->next != nullptr && mir->next->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) {
         MarkNonAliasingNonNull(mir->next);
+        // TUNING: We could track value names stored in the array.
         // The MOVE_RESULT_OBJECT will be processed next and we'll return the value name then.
       }
-      MakeArgsAliasing(mir);
+      // All args escaped (if references).
+      for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
+        uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]);
+        HandleEscapingRef(reg);
+      }
       break;
 
     case Instruction::INVOKE_DIRECT:
@@ -197,8 +475,17 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE:
       if ((mir->optimization_flags & MIR_INLINED) == 0) {
-        AdvanceGlobalMemory();
-        MakeArgsAliasing(mir);
+        // Use mir->offset as modifier; without elaborate inlining, it will be unique.
+        global_memory_version_ = LookupValue(kInvokeMemoryVersionBumpOp, 0u, 0u, mir->offset);
+        // Make ref args aliasing.
+        for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
+          uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]);
+          non_aliasing_refs_.erase(reg);
+        }
+        // All fields of escaped references need to be treated as potentially modified.
+        non_aliasing_ifields_.clear();
+        // Array elements may also have been modified via escaped array refs.
+        escaped_array_refs_.clear();
       }
       break;
 
@@ -211,13 +498,24 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
       break;
     case Instruction::MOVE_EXCEPTION:
     case Instruction::NEW_INSTANCE:
-    case Instruction::CONST_STRING:
-    case Instruction::CONST_STRING_JUMBO:
     case Instruction::CONST_CLASS:
     case Instruction::NEW_ARRAY:
       // 1 result, treat as unique each time, use result s_reg - will be unique.
       res = MarkNonAliasingNonNull(mir);
       break;
+    case Instruction::CONST_STRING:
+    case Instruction::CONST_STRING_JUMBO:
+      // These strings are internalized, so assign value based on the string pool index.
+      res = LookupValue(Instruction::CONST_STRING, Low16Bits(mir->dalvikInsn.vB),
+                        High16Bits(mir->dalvikInsn.vB), 0);
+      SetOperandValue(mir->ssa_rep->defs[0], res);
+      null_checked_.insert(res);  // May already be there.
+      // NOTE: Hacking the contents of an internalized string via reflection is possible
+      // but the behavior is undefined. Therefore, we consider the string constant and
+      // the reference non-aliasing.
+      // TUNING: We could keep this property even if the reference "escapes".
+      non_aliasing_refs_.insert(res);  // May already be there.
+      break;
     case Instruction::MOVE_RESULT_WIDE:
       // 1 wide result, treat as unique each time, use result s_reg - will be unique.
       res = GetOperandValueWide(mir->ssa_rep->defs[0]);
@@ -255,7 +553,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::CONST_4:
     case Instruction::CONST_16:
       res = LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
-                        High16Bits(mir->dalvikInsn.vB >> 16), 0);
+                        High16Bits(mir->dalvikInsn.vB), 0);
       SetOperandValue(mir->ssa_rep->defs[0], res);
       break;
 
@@ -310,7 +608,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::FLOAT_TO_INT: {
         // res = op + 1 operand
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        res = LookupValue(opcode, operand1, NO_VALUE, NO_VALUE);
+        res = LookupValue(opcode, operand1, kNoValue, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -320,8 +618,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::DOUBLE_TO_FLOAT:
     case Instruction::DOUBLE_TO_INT: {
         // res = op + 1 wide operand
-        uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
-        res = LookupValue(opcode, operand1, NO_VALUE, NO_VALUE);
+        uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
+        res = LookupValue(opcode, operand1, kNoValue, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -334,7 +632,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::NEG_DOUBLE: {
         // wide res = op + 1 wide operand
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        res = LookupValue(opcode, operand1, NO_VALUE, NO_VALUE);
+        res = LookupValue(opcode, operand1, kNoValue, kNoValue);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -344,8 +642,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::INT_TO_DOUBLE:
     case Instruction::INT_TO_LONG: {
         // wide res = op + 1 operand
-        uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        res = LookupValue(opcode, operand1, NO_VALUE, NO_VALUE);
+        uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
+        res = LookupValue(opcode, operand1, kNoValue, kNoValue);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -356,7 +654,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
         // res = op + 2 wide operands
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]);
-        res = LookupValue(opcode, operand1, operand2, NO_VALUE);
+        res = LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -388,7 +686,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
         // res = op + 2 operands
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[1]);
-        res = LookupValue(opcode, operand1, operand2, NO_VALUE);
+        res = LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -422,7 +720,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
         // wide res = op + 2 wide operands
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]);
-        res = LookupValue(opcode, operand1, operand2, NO_VALUE);
+        res = LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -435,8 +733,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::USHR_LONG_2ADDR: {
         // wide res = op + 1 wide operand + 1 operand
         uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]);
-        uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]);
-        res = LookupValue(opcode, operand1, operand2, NO_VALUE);
+        uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[2]);
+        res = LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValueWide(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -454,7 +752,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
         // res = op + 2 operands
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
         uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[1]);
-        res = LookupValue(opcode, operand1, operand2, NO_VALUE);
+        res = LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -481,7 +779,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
         // Same as res = op + 2 operands, except use vC as operand 2
         uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]);
         uint16_t operand2 = LookupValue(Instruction::CONST, mir->dalvikInsn.vC, 0, 0);
-        res = LookupValue(opcode, operand1, operand2, NO_VALUE);
+        res = LookupValue(opcode, operand1, operand2, kNoValue);
         SetOperandValue(mir->ssa_rep->defs[0], res);
       }
       break;
@@ -492,21 +790,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::AGET_BOOLEAN:
     case Instruction::AGET_BYTE:
     case Instruction::AGET_CHAR:
-    case Instruction::AGET_SHORT: {
-        uint16_t type = opcode - Instruction::AGET;
-        uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]);
-        HandleNullCheck(mir, array);
-        uint16_t index = GetOperandValue(mir->ssa_rep->uses[1]);
-        HandleRangeCheck(mir, array, index);
-        // Establish value number for loaded register. Note use of memory version.
-        uint16_t memory_version = GetMemoryVersion(array, NO_VALUE, type);
-        uint16_t res = LookupValue(ARRAY_REF, array, index, memory_version);
-        if (opcode == Instruction::AGET_WIDE) {
-          SetOperandValueWide(mir->ssa_rep->defs[0], res);
-        } else {
-          SetOperandValue(mir->ssa_rep->defs[0], res);
-        }
-      }
+    case Instruction::AGET_SHORT:
+      res = HandleAGet(mir, opcode);
       break;
 
     case Instruction::APUT_OBJECT:
@@ -517,17 +802,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::APUT_BYTE:
     case Instruction::APUT_BOOLEAN:
     case Instruction::APUT_SHORT:
-    case Instruction::APUT_CHAR: {
-        uint16_t type = opcode - Instruction::APUT;
-        int array_idx = (opcode == Instruction::APUT_WIDE) ? 2 : 1;
-        int index_idx = array_idx + 1;
-        uint16_t array = GetOperandValue(mir->ssa_rep->uses[array_idx]);
-        HandleNullCheck(mir, array);
-        uint16_t index = GetOperandValue(mir->ssa_rep->uses[index_idx]);
-        HandleRangeCheck(mir, array, index);
-        // Rev the memory version
-        AdvanceMemoryVersion(array, NO_VALUE, type);
-      }
+    case Instruction::APUT_CHAR:
+      HandleAPut(mir, opcode);
       break;
 
     case Instruction::IGET_OBJECT:
@@ -536,33 +812,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::IGET_BOOLEAN:
     case Instruction::IGET_BYTE:
     case Instruction::IGET_CHAR:
-    case Instruction::IGET_SHORT: {
-        uint16_t type = opcode - Instruction::IGET;
-        uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]);
-        HandleNullCheck(mir, base);
-        const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir);
-        uint16_t memory_version;
-        uint16_t field_id;
-        if (!field_info.IsResolved() || field_info.IsVolatile()) {
-          // Volatile fields always get a new memory version; field id is irrelevant.
-          // Unresolved fields may be volatile, so handle them as such to be safe.
-          field_id = 0u;
-          memory_version = next_memory_version_;
-          ++next_memory_version_;
-        } else {
-          DCHECK(field_info.IsResolved());
-          field_id = GetFieldId(field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex());
-          memory_version = std::max(unresolved_ifield_version_[type],
-                                    GetMemoryVersion(base, field_id, type));
-        }
-        if (opcode == Instruction::IGET_WIDE) {
-          res = LookupValue(Instruction::IGET_WIDE, base, field_id, memory_version);
-          SetOperandValueWide(mir->ssa_rep->defs[0], res);
-        } else {
-          res = LookupValue(Instruction::IGET, base, field_id, memory_version);
-          SetOperandValue(mir->ssa_rep->defs[0], res);
-        }
-      }
+    case Instruction::IGET_SHORT:
+      res = HandleIGet(mir, opcode);
       break;
 
     case Instruction::IPUT_OBJECT:
@@ -573,24 +824,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::IPUT_BOOLEAN:
     case Instruction::IPUT_BYTE:
     case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_SHORT: {
-        uint16_t type = opcode - Instruction::IPUT;
-        int base_reg = (opcode == Instruction::IPUT_WIDE) ? 2 : 1;
-        uint16_t base = GetOperandValue(mir->ssa_rep->uses[base_reg]);
-        HandleNullCheck(mir, base);
-        const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir);
-        if (!field_info.IsResolved()) {
-          // Unresolved fields always alias with everything of the same type.
-          unresolved_ifield_version_[type] = next_memory_version_;
-          ++next_memory_version_;
-        } else if (field_info.IsVolatile()) {
-          // Nothing to do, resolved volatile fields always get a new memory version anyway and
-          // can't alias with resolved non-volatile fields.
-        } else {
-          AdvanceMemoryVersion(base, GetFieldId(field_info.DeclaringDexFile(),
-                                                field_info.DeclaringFieldIndex()), type);
-        }
-      }
+    case Instruction::IPUT_SHORT:
+      HandleIPut(mir, opcode);
       break;
 
     case Instruction::SGET_OBJECT:
@@ -599,31 +834,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::SGET_BOOLEAN:
     case Instruction::SGET_BYTE:
     case Instruction::SGET_CHAR:
-    case Instruction::SGET_SHORT: {
-        uint16_t type = opcode - Instruction::SGET;
-        const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir);
-        uint16_t memory_version;
-        uint16_t field_id;
-        if (!field_info.IsResolved() || field_info.IsVolatile()) {
-          // Volatile fields always get a new memory version; field id is irrelevant.
-          // Unresolved fields may be volatile, so handle them as such to be safe.
-          field_id = 0u;
-          memory_version = next_memory_version_;
-          ++next_memory_version_;
-        } else {
-          DCHECK(field_info.IsResolved());
-          field_id = GetFieldId(field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex());
-          memory_version = std::max(unresolved_sfield_version_[type],
-                                    GetMemoryVersion(NO_VALUE, field_id, type));
-        }
-        if (opcode == Instruction::SGET_WIDE) {
-          res = LookupValue(Instruction::SGET_WIDE, NO_VALUE, field_id, memory_version);
-          SetOperandValueWide(mir->ssa_rep->defs[0], res);
-        } else {
-          res = LookupValue(Instruction::SGET, NO_VALUE, field_id, memory_version);
-          SetOperandValue(mir->ssa_rep->defs[0], res);
-        }
-      }
+    case Instruction::SGET_SHORT:
+      res = HandleSGet(mir, opcode);
       break;
 
     case Instruction::SPUT_OBJECT:
@@ -634,21 +846,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
     case Instruction::SPUT_BOOLEAN:
     case Instruction::SPUT_BYTE:
     case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT: {
-        uint16_t type = opcode - Instruction::SPUT;
-        const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir);
-        if (!field_info.IsResolved()) {
-          // Unresolved fields always alias with everything of the same type.
-          unresolved_sfield_version_[type] = next_memory_version_;
-          ++next_memory_version_;
-        } else if (field_info.IsVolatile()) {
-          // Nothing to do, resolved volatile fields always get a new memory version anyway and
-          // can't alias with resolved non-volatile fields.
-        } else {
-          AdvanceMemoryVersion(NO_VALUE, GetFieldId(field_info.DeclaringDexFile(),
-                                                    field_info.DeclaringFieldIndex()), type);
-        }
-      }
+    case Instruction::SPUT_SHORT:
+      HandleSPut(mir, opcode);
       break;
   }
   return res;
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 0c2b6a7e01..2a815be1cc 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -23,15 +23,33 @@
 #include "utils/scoped_arena_allocator.h"
 #include "utils/scoped_arena_containers.h"
 
-#define NO_VALUE 0xffff
-#define ARRAY_REF 0xfffe
-
 namespace art {
 
 class DexFile;
+class MirFieldInfo;
 
 class LocalValueNumbering {
+ public:
+  LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator);
+
+  uint16_t GetValueNumber(MIR* mir);
+
+  // LocalValueNumbering should be allocated on the ArenaStack (or the native stack).
+  static void* operator new(size_t size, ScopedArenaAllocator* allocator) {
+    return allocator->Alloc(sizeof(LocalValueNumbering), kArenaAllocMIR);
+  }
+
+  // Allow delete-expression to destroy a LocalValueNumbering object without deallocation.
+  static void operator delete(void* ptr) { UNUSED(ptr); }
+
+  // Checks that the value names didn't overflow.
+  bool Good() const {
+    return last_value_ < kNoValue;
+  }
+
  private:
+  static constexpr uint16_t kNoValue = 0xffffu;
+
   // Field types correspond to the ordering of GET/PUT instructions; this order is the same
   // for IGET, IPUT, SGET, SPUT, AGET and APUT:
   // op         0
@@ -43,7 +61,7 @@ class LocalValueNumbering {
   // op_SHORT   6
   static constexpr size_t kFieldTypeCount = 7;
 
-  // FieldReference represents either a unique resolved field or all unresolved fields together.
+  // FieldReference represents a unique resolved field.
   struct FieldReference {
     const DexFile* dex_file;
     uint16_t field_idx;
@@ -58,48 +76,107 @@ class LocalValueNumbering {
     }
   };
 
-  struct MemoryVersionKey {
+  // Maps field key to field id for resolved fields.
+  typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap;
+
+  struct RangeCheckKey {
+    uint16_t array;
+    uint16_t index;
+  };
+
+  struct RangeCheckKeyComparator {
+    bool operator()(const RangeCheckKey& lhs, const RangeCheckKey& rhs) const {
+      if (lhs.array != rhs.array) {
+        return lhs.array < rhs.array;
+      }
+      return lhs.index < rhs.index;
+    }
+  };
+
+  typedef ScopedArenaSet<RangeCheckKey, RangeCheckKeyComparator> RangeCheckSet;
+
+  typedef ScopedArenaSafeMap<uint16_t, uint16_t> AliasingIFieldVersionMap;
+  typedef ScopedArenaSafeMap<uint16_t, uint16_t> NonAliasingArrayVersionMap;
+
+  struct NonAliasingIFieldKey {
     uint16_t base;
     uint16_t field_id;
     uint16_t type;
   };
 
-  struct MemoryVersionKeyComparator {
-    bool operator()(const MemoryVersionKey& lhs, const MemoryVersionKey& rhs) const {
-      if (lhs.base != rhs.base) {
-        return lhs.base < rhs.base;
+  struct NonAliasingIFieldKeyComparator {
+    bool operator()(const NonAliasingIFieldKey& lhs, const NonAliasingIFieldKey& rhs) const {
+      // Compare the type first. This allows iterating across all the entries for a certain type
+      // as needed when we need to purge them for an unresolved field IPUT.
+      if (lhs.type != rhs.type) {
+        return lhs.type < rhs.type;
       }
+      // Compare the field second. This allows iterating across all the entries for a certain
+      // field as needed when we need to purge them for an aliasing field IPUT.
       if (lhs.field_id != rhs.field_id) {
         return lhs.field_id < rhs.field_id;
       }
-      return lhs.type < rhs.type;
+      // Compare the base last.
+      return lhs.base < rhs.base;
     }
   };
 
+  // Set of instance fields still holding non-aliased values after the base has been stored.
+  typedef ScopedArenaSet<NonAliasingIFieldKey, NonAliasingIFieldKeyComparator> NonAliasingFieldSet;
+
+  struct EscapedArrayKey {
+    uint16_t base;
+    uint16_t type;
+  };
+
+  struct EscapedArrayKeyComparator {
+    bool operator()(const EscapedArrayKey& lhs, const EscapedArrayKey& rhs) const {
+      // Compare the type first. This allows iterating across all the entries for a certain type
+      // as needed when we need to purge them for an unresolved field APUT.
+      if (lhs.type != rhs.type) {
+        return lhs.type < rhs.type;
+      }
+      // Compare the base last.
+      return lhs.base < rhs.base;
+    }
+  };
+
+  // Set of previously non-aliasing array refs that escaped.
+  typedef ScopedArenaSet<EscapedArrayKey, EscapedArrayKeyComparator> EscapedArraySet;
+
   // Key is s_reg, value is value name.
   typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap;
   // Key is concatenation of opcode, operand1, operand2 and modifier, value is value name.
   typedef ScopedArenaSafeMap<uint64_t, uint16_t> ValueMap;
   // Key represents a memory address, value is generation.
-  typedef ScopedArenaSafeMap<MemoryVersionKey, uint16_t, MemoryVersionKeyComparator
-      > MemoryVersionMap;
-  // Maps field key to field id for resolved fields.
-  typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap;
   // A set of value names.
   typedef ScopedArenaSet<uint16_t> ValueNameSet;
 
- public:
-  static LocalValueNumbering* Create(CompilationUnit* cu) {
-    std::unique_ptr<ScopedArenaAllocator> allocator(ScopedArenaAllocator::Create(&cu->arena_stack));
-    void* addr = allocator->Alloc(sizeof(LocalValueNumbering), kArenaAllocMisc);
-    return new(addr) LocalValueNumbering(cu, allocator.release());
-  }
-
   static uint64_t BuildKey(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
     return (static_cast<uint64_t>(op) << 48 | static_cast<uint64_t>(operand1) << 32 |
             static_cast<uint64_t>(operand2) << 16 | static_cast<uint64_t>(modifier));
   };
 
+  static uint16_t ExtractOp(uint64_t key) {
+    return static_cast<uint16_t>(key >> 48);
+  }
+
+  static uint16_t ExtractOperand1(uint64_t key) {
+    return static_cast<uint16_t>(key >> 32);
+  }
+
+  static uint16_t ExtractOperand2(uint64_t key) {
+    return static_cast<uint16_t>(key >> 16);
+  }
+
+  static uint16_t ExtractModifier(uint64_t key) {
+    return static_cast<uint16_t>(key);
+  }
+
+  static bool EqualOpAndOperand1(uint64_t key1, uint64_t key2) {
+    return static_cast<uint32_t>(key1 >> 32) == static_cast<uint32_t>(key2 >> 32);
+  }
+
   uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
     uint16_t res;
     uint64_t key = BuildKey(op, operand1, operand2, modifier);
@@ -107,12 +184,26 @@ class LocalValueNumbering {
     if (it != value_map_.end()) {
       res = it->second;
     } else {
-      res = value_map_.size() + 1;
+      ++last_value_;
+      res = last_value_;
       value_map_.Put(key, res);
     }
     return res;
   };
 
+  void StoreValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier,
+                  uint16_t value) {
+    uint64_t key = BuildKey(op, operand1, operand2, modifier);
+    value_map_.Overwrite(key, value);
+  }
+
+  bool HasValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier,
+                uint16_t value) const {
+    uint64_t key = BuildKey(op, operand1, operand2, modifier);
+    ValueMap::const_iterator it = value_map_.find(key);
+    return (it != value_map_.end() && it->second == value);
+  };
+
   bool ValueExists(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) const {
     uint64_t key = BuildKey(op, operand1, operand2, modifier);
     ValueMap::const_iterator it = value_map_.find(key);
@@ -129,13 +220,13 @@ class LocalValueNumbering {
   };
 
   uint16_t GetOperandValue(int s_reg) {
-    uint16_t res = NO_VALUE;
+    uint16_t res = kNoValue;
     SregValueMap::iterator it = sreg_value_map_.find(s_reg);
     if (it != sreg_value_map_.end()) {
       res = it->second;
     } else {
       // First use
-      res = LookupValue(NO_VALUE, s_reg, NO_VALUE, NO_VALUE);
+      res = LookupValue(kNoValue, s_reg, kNoValue, kNoValue);
       sreg_value_map_.Put(s_reg, res);
     }
     return res;
@@ -151,63 +242,61 @@ class LocalValueNumbering {
   };
 
   uint16_t GetOperandValueWide(int s_reg) {
-    uint16_t res = NO_VALUE;
+    uint16_t res = kNoValue;
     SregValueMap::iterator it = sreg_wide_value_map_.find(s_reg);
     if (it != sreg_wide_value_map_.end()) {
       res = it->second;
     } else {
       // First use
-      res = LookupValue(NO_VALUE, s_reg, NO_VALUE, NO_VALUE);
+      res = LookupValue(kNoValue, s_reg, kNoValue, kNoValue);
       sreg_wide_value_map_.Put(s_reg, res);
     }
     return res;
   };
 
-  uint16_t GetValueNumber(MIR* mir);
-
-  // Allow delete-expression to destroy a LocalValueNumbering object without deallocation.
-  static void operator delete(void* ptr) { UNUSED(ptr); }
-
- private:
-  LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator)
-      : cu_(cu),
-        allocator_(allocator),
-        sreg_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-        sreg_wide_value_map_(std::less<uint16_t>(), allocator->Adapter()),
-        value_map_(std::less<uint64_t>(), allocator->Adapter()),
-        next_memory_version_(1u),
-        global_memory_version_(0u),
-        memory_version_map_(MemoryVersionKeyComparator(), allocator->Adapter()),
-        field_index_map_(FieldReferenceComparator(), allocator->Adapter()),
-        non_aliasing_refs_(std::less<uint16_t>(), allocator->Adapter()),
-        null_checked_(std::less<uint16_t>(), allocator->Adapter()) {
-    std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u);
-    std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u);
-  }
-
-  uint16_t GetFieldId(const DexFile* dex_file, uint16_t field_idx);
-  void AdvanceGlobalMemory();
-  uint16_t GetMemoryVersion(uint16_t base, uint16_t field, uint16_t type);
-  uint16_t AdvanceMemoryVersion(uint16_t base, uint16_t field, uint16_t type);
+  uint16_t GetFieldId(const MirFieldInfo& field_info);
   uint16_t MarkNonAliasingNonNull(MIR* mir);
-  void MakeArgsAliasing(MIR* mir);
+  bool IsNonAliasing(uint16_t reg);
+  bool IsNonAliasingIField(uint16_t reg, uint16_t field_id, uint16_t type);
+  bool IsNonAliasingArray(uint16_t reg, uint16_t type);
   void HandleNullCheck(MIR* mir, uint16_t reg);
   void HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index);
   void HandlePutObject(MIR* mir);
+  void HandleEscapingRef(uint16_t base);
+  uint16_t HandleAGet(MIR* mir, uint16_t opcode);
+  void HandleAPut(MIR* mir, uint16_t opcode);
+  uint16_t HandleIGet(MIR* mir, uint16_t opcode);
+  void HandleIPut(MIR* mir, uint16_t opcode);
+  uint16_t HandleSGet(MIR* mir, uint16_t opcode);
+  void HandleSPut(MIR* mir, uint16_t opcode);
 
   CompilationUnit* const cu_;
-  std::unique_ptr<ScopedArenaAllocator> allocator_;
+
+  // We have 32-bit last_value_ so that we can detect when we run out of value names, see Good().
+  // We usually don't check Good() until the end of LVN unless we're about to modify code.
+  uint32_t last_value_;
+
   SregValueMap sreg_value_map_;
   SregValueMap sreg_wide_value_map_;
   ValueMap value_map_;
-  uint16_t next_memory_version_;
+
+  // Data for dealing with memory clobbering and store/load aliasing.
   uint16_t global_memory_version_;
   uint16_t unresolved_sfield_version_[kFieldTypeCount];
   uint16_t unresolved_ifield_version_[kFieldTypeCount];
-  MemoryVersionMap memory_version_map_;
+  uint16_t aliasing_array_version_[kFieldTypeCount];
+  AliasingIFieldVersionMap aliasing_ifield_version_map_;
+  NonAliasingArrayVersionMap non_aliasing_array_version_map_;
   FieldIndexMap field_index_map_;
   // Value names of references to objects that cannot be reached through a different value name.
   ValueNameSet non_aliasing_refs_;
+  // Instance fields still holding non-aliased values after the base has escaped.
+  NonAliasingFieldSet non_aliasing_ifields_;
+  // Previously non-aliasing array refs that escaped but can still be used for non-aliasing AGET.
+  EscapedArraySet escaped_array_refs_;
+
+  // Range check and null check elimination.
+  RangeCheckSet range_checked_;
   ValueNameSet null_checked_;
 
   DISALLOW_COPY_AND_ASSIGN(LocalValueNumbering);
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index e56e0160ca..efc4fc8a34 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -40,7 +40,7 @@ class LocalValueNumberingTest : public testing::Test {
 
   struct MIRDef {
     static constexpr size_t kMaxSsaDefs = 2;
-    static constexpr size_t kMaxSsaUses = 3;
+    static constexpr size_t kMaxSsaUses = 4;
 
     Instruction::Code opcode;
     int64_t value;
@@ -55,6 +55,8 @@ class LocalValueNumberingTest : public testing::Test {
     { opcode, value, 0u, 0, { }, 1, { reg } }
 #define DEF_CONST_WIDE(opcode, reg, value) \
     { opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } }
+#define DEF_CONST_STRING(opcode, reg, index) \
+    { opcode, index, 0u, 0, { }, 1, { reg } }
 #define DEF_IGET(opcode, reg, obj, field_info) \
     { opcode, 0u, field_info, 1, { obj }, 1, { reg } }
 #define DEF_IGET_WIDE(opcode, reg, obj, field_info) \
@@ -71,6 +73,14 @@ class LocalValueNumberingTest : public testing::Test {
     { opcode, 0u, field_info, 1, { reg }, 0, { } }
 #define DEF_SPUT_WIDE(opcode, reg, field_info) \
     { opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } }
+#define DEF_AGET(opcode, reg, obj, idx) \
+    { opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } }
+#define DEF_AGET_WIDE(opcode, reg, obj, idx) \
+    { opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } }
+#define DEF_APUT(opcode, reg, obj, idx) \
+    { opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } }
+#define DEF_APUT_WIDE(opcode, reg, obj, idx) \
+    { opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } }
 #define DEF_INVOKE1(opcode, reg) \
     { opcode, 0u, 0u, 1, { reg }, 0, { } }
 #define DEF_UNIQUE_REF(opcode, reg) \
@@ -163,6 +173,7 @@ class LocalValueNumberingTest : public testing::Test {
     for (size_t i = 0; i != mir_count_; ++i) {
       value_names_[i] =  lvn_->GetValueNumber(&mirs_[i]);
     }
+    EXPECT_TRUE(lvn_->Good());
   }
 
   LocalValueNumberingTest()
@@ -170,8 +181,11 @@ class LocalValueNumberingTest : public testing::Test {
         cu_(&pool_),
         mir_count_(0u),
         mirs_(nullptr),
-        lvn_(LocalValueNumbering::Create(&cu_)) {
+        allocator_(),
+        lvn_() {
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
+    allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
+    lvn_.reset(new (allocator_.get()) LocalValueNumbering(&cu_, allocator_.get()));
   }
 
   ArenaPool pool_;
@@ -180,12 +194,13 @@ class LocalValueNumberingTest : public testing::Test {
   MIR* mirs_;
   std::vector<SSARepresentation> ssa_reps_;
   std::vector<uint16_t> value_names_;
+  std::unique_ptr<ScopedArenaAllocator> allocator_;
   std::unique_ptr<LocalValueNumbering> lvn_;
 };
 
-TEST_F(LocalValueNumberingTest, TestIGetIGetInvokeIGet) {
+TEST_F(LocalValueNumberingTest, IGetIGetInvokeIGet) {
   static const IFieldDef ifields[] = {
-      { 1u, 1u, 1u, false }
+      { 1u, 1u, 1u, false },
   };
   static const MIRDef mirs[] = {
       DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
@@ -206,15 +221,15 @@ TEST_F(LocalValueNumberingTest, TestIGetIGetInvokeIGet) {
   EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
 }
 
-TEST_F(LocalValueNumberingTest, TestIGetIPutIGetIGetIGet) {
+TEST_F(LocalValueNumberingTest, IGetIPutIGetIGetIGet) {
   static const IFieldDef ifields[] = {
       { 1u, 1u, 1u, false },
       { 2u, 1u, 2u, false },
   };
   static const MIRDef mirs[] = {
-      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
-      DEF_IPUT(Instruction::IPUT, 1u, 11u, 0u),  // May alias.
-      DEF_IGET(Instruction::IGET, 2u, 10u, 0u),
+      DEF_IGET(Instruction::IGET_OBJECT, 0u, 10u, 0u),
+      DEF_IPUT(Instruction::IPUT_OBJECT, 1u, 11u, 0u),  // May alias.
+      DEF_IGET(Instruction::IGET_OBJECT, 2u, 10u, 0u),
       DEF_IGET(Instruction::IGET, 3u,  0u, 1u),
       DEF_IGET(Instruction::IGET, 4u,  2u, 1u),
   };
@@ -232,7 +247,7 @@ TEST_F(LocalValueNumberingTest, TestIGetIPutIGetIGetIGet) {
   EXPECT_EQ(mirs_[4].optimization_flags, 0u);
 }
 
-TEST_F(LocalValueNumberingTest, TestUniquePreserve1) {
+TEST_F(LocalValueNumberingTest, UniquePreserve1) {
   static const IFieldDef ifields[] = {
       { 1u, 1u, 1u, false },
   };
@@ -253,7 +268,7 @@ TEST_F(LocalValueNumberingTest, TestUniquePreserve1) {
   EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
 }
 
-TEST_F(LocalValueNumberingTest, TestUniquePreserve2) {
+TEST_F(LocalValueNumberingTest, UniquePreserve2) {
   static const IFieldDef ifields[] = {
       { 1u, 1u, 1u, false },
   };
@@ -274,7 +289,7 @@ TEST_F(LocalValueNumberingTest, TestUniquePreserve2) {
   EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
 }
 
-TEST_F(LocalValueNumberingTest, TestUniquePreserveAndEscape) {
+TEST_F(LocalValueNumberingTest, UniquePreserveAndEscape) {
   static const IFieldDef ifields[] = {
       { 1u, 1u, 1u, false },
   };
@@ -298,7 +313,7 @@ TEST_F(LocalValueNumberingTest, TestUniquePreserveAndEscape) {
   EXPECT_EQ(mirs_[5].optimization_flags, MIR_IGNORE_NULL_CHECK);
 }
 
-TEST_F(LocalValueNumberingTest, TestVolatile) {
+TEST_F(LocalValueNumberingTest, Volatile) {
   static const IFieldDef ifields[] = {
       { 1u, 1u, 1u, false },
       { 2u, 1u, 2u, true },
@@ -322,4 +337,264 @@ TEST_F(LocalValueNumberingTest, TestVolatile) {
   EXPECT_EQ(mirs_[3].optimization_flags, 0u);
 }
 
+TEST_F(LocalValueNumberingTest, UnresolvedIField) {
+  static const IFieldDef ifields[] = {
+      { 1u, 1u, 1u, false },  // Resolved field #1.
+      { 2u, 1u, 2u, false },  // Resolved field #2.
+      { 3u, 0u, 0u, false },  // Unresolved field.
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
+      DEF_IGET(Instruction::IGET, 1u, 20u, 0u),             // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 2u, 21u, 0u),             // Resolved field #1.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 3u, 21u, 1u),   // Resolved field #2.
+      DEF_IGET(Instruction::IGET, 4u, 22u, 2u),             // IGET doesn't clobber anything.
+      DEF_IGET(Instruction::IGET, 5u, 20u, 0u),             // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 6u, 21u, 0u),             // Resolved field #1.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 7u, 21u, 1u),   // Resolved field #2.
+      DEF_IPUT(Instruction::IPUT, 8u, 22u, 2u),             // IPUT clobbers field #1 (#2 if wide).
+      DEF_IGET(Instruction::IGET, 9u, 20u, 0u),             // Resolved field #1, unique object.
+      DEF_IGET(Instruction::IGET, 10u, 21u, 0u),            // Resolved field #1, new value name.
+      DEF_IGET_WIDE(Instruction::IGET_WIDE, 11u, 21u, 1u),  // Resolved field #2.
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 12u);
+  EXPECT_EQ(value_names_[1], value_names_[5]);
+  EXPECT_EQ(value_names_[2], value_names_[6]);
+  EXPECT_EQ(value_names_[3], value_names_[7]);
+  EXPECT_EQ(value_names_[1], value_names_[9]);
+  EXPECT_NE(value_names_[2], value_names_[10]);  // This aliased with unresolved IPUT.
+  EXPECT_EQ(value_names_[3], value_names_[11]);
+  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
+  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  EXPECT_EQ(mirs_[2].optimization_flags, 0u);
+  EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  EXPECT_EQ(mirs_[4].optimization_flags, 0u);
+  for (size_t i = 5u; i != mir_count_; ++i) {
+    EXPECT_EQ(mirs_[i].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  }
+}
+
+TEST_F(LocalValueNumberingTest, UnresolvedSField) {
+  static const SFieldDef sfields[] = {
+      { 1u, 1u, 1u, false },  // Resolved field #1.
+      { 2u, 1u, 2u, false },  // Resolved field #2.
+      { 3u, 0u, 0u, false },  // Unresolved field.
+  };
+  static const MIRDef mirs[] = {
+      DEF_SGET(Instruction::SGET, 0u, 0u),            // Resolved field #1.
+      DEF_SGET_WIDE(Instruction::SGET_WIDE, 1u, 1u),  // Resolved field #2.
+      DEF_SGET(Instruction::SGET, 2u, 2u),            // SGET doesn't clobber anything.
+      DEF_SGET(Instruction::SGET, 3u, 0u),            // Resolved field #1.
+      DEF_SGET_WIDE(Instruction::SGET_WIDE, 4u, 1u),  // Resolved field #2.
+      DEF_SPUT(Instruction::SPUT, 5u, 2u),            // SPUT clobbers field #1 (#2 is wide).
+      DEF_SGET(Instruction::SGET, 6u, 0u),            // Resolved field #1.
+      DEF_SGET_WIDE(Instruction::SGET_WIDE, 7u, 1u),  // Resolved field #2.
+  };
+
+  PrepareSFields(sfields);
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 8u);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+  EXPECT_EQ(value_names_[1], value_names_[4]);
+  EXPECT_NE(value_names_[0], value_names_[6]);  // This aliased with unresolved IPUT.
+  EXPECT_EQ(value_names_[1], value_names_[7]);
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    EXPECT_EQ(mirs_[i].optimization_flags, 0u) << i;
+  }
+}
+
+TEST_F(LocalValueNumberingTest, ConstString) {
+  static const MIRDef mirs[] = {
+      DEF_CONST_STRING(Instruction::CONST_STRING, 0u, 0u),
+      DEF_CONST_STRING(Instruction::CONST_STRING, 1u, 0u),
+      DEF_CONST_STRING(Instruction::CONST_STRING, 2u, 2u),
+      DEF_CONST_STRING(Instruction::CONST_STRING, 3u, 0u),
+      DEF_INVOKE1(Instruction::INVOKE_DIRECT, 2u),
+      DEF_CONST_STRING(Instruction::CONST_STRING, 4u, 2u),
+  };
+
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 6u);
+  EXPECT_EQ(value_names_[1], value_names_[0]);
+  EXPECT_NE(value_names_[2], value_names_[0]);
+  EXPECT_EQ(value_names_[3], value_names_[0]);
+  EXPECT_EQ(value_names_[5], value_names_[2]);
+}
+
+TEST_F(LocalValueNumberingTest, SameValueInDifferentMemoryLocations) {
+  static const IFieldDef ifields[] = {
+      { 1u, 1u, 1u, false },
+      { 2u, 1u, 2u, false },
+  };
+  static const SFieldDef sfields[] = {
+      { 3u, 1u, 3u, false },
+  };
+  static const MIRDef mirs[] = {
+      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
+      DEF_IPUT(Instruction::IPUT, 0u, 10u, 1u),
+      DEF_SPUT(Instruction::SPUT, 0u, 0u),
+      DEF_APUT(Instruction::APUT, 0u, 11u, 12u),
+      DEF_IGET(Instruction::IGET, 1u, 10u, 0u),
+      DEF_IGET(Instruction::IGET, 2u, 10u, 1u),
+      DEF_AGET(Instruction::AGET, 3u, 11u, 12u),
+      DEF_SGET(Instruction::SGET, 4u, 0u),
+  };
+
+  PrepareIFields(ifields);
+  PrepareSFields(sfields);
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 8u);
+  EXPECT_EQ(value_names_[4], value_names_[0]);
+  EXPECT_EQ(value_names_[5], value_names_[0]);
+  EXPECT_EQ(value_names_[6], value_names_[0]);
+  EXPECT_EQ(value_names_[7], value_names_[0]);
+  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
+  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  EXPECT_EQ(mirs_[2].optimization_flags, 0u);
+  EXPECT_EQ(mirs_[3].optimization_flags, 0u);
+  EXPECT_EQ(mirs_[4].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  EXPECT_EQ(mirs_[5].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  EXPECT_EQ(mirs_[6].optimization_flags, MIR_IGNORE_NULL_CHECK | MIR_IGNORE_RANGE_CHECK);
+  EXPECT_EQ(mirs_[7].optimization_flags, 0u);
+}
+
+TEST_F(LocalValueNumberingTest, UniqueArrayAliasing) {
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 20u),
+      DEF_AGET(Instruction::AGET, 1u, 20u, 40u),
+      DEF_APUT(Instruction::APUT, 2u, 20u, 41u),  // May alias with index for sreg 40u.
+      DEF_AGET(Instruction::AGET, 3u, 20u, 40u),
+  };
+
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 4u);
+  EXPECT_NE(value_names_[1], value_names_[3]);
+  EXPECT_EQ(mirs_[0].optimization_flags, 0u);
+  EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  EXPECT_EQ(mirs_[2].optimization_flags, MIR_IGNORE_NULL_CHECK);
+  EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK | MIR_IGNORE_RANGE_CHECK);
+}
+
+TEST_F(LocalValueNumberingTest, EscapingRefs) {
+  static const IFieldDef ifields[] = {
+      { 1u, 1u, 1u, false },  // Field #1.
+      { 2u, 1u, 2u, false },  // Field #2.
+      { 3u, 1u, 3u, false },  // Reference field for storing escaping refs.
+      { 4u, 1u, 4u, false },  // Wide.
+      { 5u, 0u, 0u, false },  // Unresolved field, int.
+      { 6u, 0u, 0u, false },  // Unresolved field, wide.
+  };
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u),
+      DEF_IGET(Instruction::IGET, 1u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 2u, 20u, 1u),
+      DEF_IPUT(Instruction::IPUT_OBJECT, 20u, 30u, 2u),      // Ref escapes.
+      DEF_IGET(Instruction::IGET, 4u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 5u, 20u, 1u),
+      DEF_IPUT(Instruction::IPUT, 6u, 31u, 0u),              // May alias with field #1.
+      DEF_IGET(Instruction::IGET, 7u, 20u, 0u),              // New value.
+      DEF_IGET(Instruction::IGET, 8u, 20u, 1u),              // Still the same.
+      DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 9u, 31u, 3u),    // No aliasing, different type.
+      DEF_IGET(Instruction::IGET, 10u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 11u, 20u, 1u),
+      DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 12u, 31u, 5u),   // No aliasing, different type.
+      DEF_IGET(Instruction::IGET, 13u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 14u, 20u, 1u),
+      DEF_IPUT(Instruction::IPUT, 15u, 31u, 4u),             // Aliasing, same type.
+      DEF_IGET(Instruction::IGET, 16u, 20u, 0u),
+      DEF_IGET(Instruction::IGET, 17u, 20u, 1u),
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 18u);
+  EXPECT_EQ(value_names_[1], value_names_[4]);
+  EXPECT_EQ(value_names_[2], value_names_[5]);
+  EXPECT_NE(value_names_[4], value_names_[7]);  // New value.
+  EXPECT_EQ(value_names_[5], value_names_[8]);
+  EXPECT_EQ(value_names_[7], value_names_[10]);
+  EXPECT_EQ(value_names_[8], value_names_[11]);
+  EXPECT_EQ(value_names_[10], value_names_[13]);
+  EXPECT_EQ(value_names_[11], value_names_[14]);
+  EXPECT_NE(value_names_[13], value_names_[16]);  // New value.
+  EXPECT_NE(value_names_[14], value_names_[17]);  // New value.
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected = (i != 0u && i != 3u && i != 6u) ? MIR_IGNORE_NULL_CHECK : 0u;
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
+TEST_F(LocalValueNumberingTest, EscapingArrayRefs) {
+  static const MIRDef mirs[] = {
+      DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 20u),
+      DEF_AGET(Instruction::AGET, 1u, 20u, 40u),
+      DEF_AGET(Instruction::AGET, 2u, 20u, 41u),
+      DEF_APUT(Instruction::APUT_OBJECT, 20u, 30u, 42u),    // Array ref escapes.
+      DEF_AGET(Instruction::AGET, 4u, 20u, 40u),
+      DEF_AGET(Instruction::AGET, 5u, 20u, 41u),
+      DEF_APUT_WIDE(Instruction::APUT_WIDE, 6u, 31u, 43u),  // No aliasing, different type.
+      DEF_AGET(Instruction::AGET, 7u, 20u, 40u),
+      DEF_AGET(Instruction::AGET, 8u, 20u, 41u),
+      DEF_APUT(Instruction::APUT, 9u, 32u, 40u),            // May alias with all elements.
+      DEF_AGET(Instruction::AGET, 10u, 20u, 40u),           // New value (same index name).
+      DEF_AGET(Instruction::AGET, 11u, 20u, 41u),           // New value (different index name).
+  };
+
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 12u);
+  EXPECT_EQ(value_names_[1], value_names_[4]);
+  EXPECT_EQ(value_names_[2], value_names_[5]);
+  EXPECT_EQ(value_names_[4], value_names_[7]);
+  EXPECT_EQ(value_names_[5], value_names_[8]);
+  EXPECT_NE(value_names_[7], value_names_[10]);  // New value.
+  EXPECT_NE(value_names_[8], value_names_[11]);  // New value.
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected =
+        ((i != 0u && i != 3u && i != 6u && i != 9u) ? MIR_IGNORE_NULL_CHECK : 0u) |
+        ((i >= 4 && i != 6u && i != 9u) ? MIR_IGNORE_RANGE_CHECK : 0u);
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
+TEST_F(LocalValueNumberingTest, StoringSameValueKeepsMemoryVersion) {
+  static const IFieldDef ifields[] = {
+      { 1u, 1u, 1u, false },
+  };
+  static const MIRDef mirs[] = {
+      DEF_IGET(Instruction::IGET, 0u, 10u, 0u),
+      DEF_IGET(Instruction::IGET, 1u, 11u, 0u),
+      DEF_IPUT(Instruction::IPUT, 1u, 11u, 0u),   // Store the same value.
+      DEF_IGET(Instruction::IGET, 3u, 10u, 0u),
+      DEF_AGET(Instruction::AGET, 4u, 12u, 40u),
+      DEF_AGET(Instruction::AGET, 5u, 13u, 40u),
+      DEF_APUT(Instruction::APUT, 5u, 13u, 40u),  // Store the same value.
+      DEF_AGET(Instruction::AGET, 7u, 12u, 40u),
+  };
+
+  PrepareIFields(ifields);
+  PrepareMIRs(mirs);
+  PerformLVN();
+  ASSERT_EQ(value_names_.size(), 8u);
+  EXPECT_NE(value_names_[0], value_names_[1]);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+  EXPECT_NE(value_names_[4], value_names_[5]);
+  EXPECT_EQ(value_names_[4], value_names_[7]);
+  for (size_t i = 0u; i != mir_count_; ++i) {
+    int expected =
+        ((i == 2u || i == 3u || i == 6u || i == 7u) ? MIR_IGNORE_NULL_CHECK : 0u) |
+        ((i == 6u || i == 7u) ? MIR_IGNORE_RANGE_CHECK : 0u);
+    EXPECT_EQ(expected, mirs_[i].optimization_flags) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 508f1c70bd..7129f8a501 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -902,7 +902,7 @@ void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) {
   while (!done) {
     tbb->visited = true;
     for (MIR* mir = tbb->first_mir_insn; mir != NULL; mir = mir->next) {
-      if (static_cast<uint32_t>(mir->dalvikInsn.opcode) >= kMirOpFirst) {
+      if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
         // Skip any MIR pseudo-op.
         continue;
       }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 47b233b463..9fea709568 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1015,8 +1015,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
     uint64_t df_attributes = GetDataFlowAttributes(mir);
 
       // If not a pseudo-op, note non-leaf or can throw
-    if (static_cast<int>(mir->dalvikInsn.opcode) <
-        static_cast<int>(kNumPackedOpcodes)) {
+    if (!IsPseudoMirOp(mir->dalvikInsn.opcode)) {
       int flags = Instruction::FlagsOf(mir->dalvikInsn.opcode);
 
       if ((flags & Instruction::kInvoke) != 0 && (mir->optimization_flags & MIR_INLINED) == 0) {
@@ -1282,7 +1281,7 @@ bool MIRGraph::VerifyPredInfo(BasicBlock* bb) {
   GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
 
   while (true) {
-    BasicBlock *pred_bb = GetBasicBlock(iter.Next());
+    BasicBlock* pred_bb = GetBasicBlock(iter.Next());
     if (!pred_bb) break;
     bool found = false;
     if (pred_bb->taken == bb->id) {
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 0fffa01350..3ef1dbfac3 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -26,6 +26,7 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "leb128.h"
+#include "pass_driver_me_post_opt.h"
 
 namespace art {
 
@@ -353,7 +354,7 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs
 
   /* Always terminate the current block for conditional branches */
   if (flags & Instruction::kContinue) {
-    BasicBlock *fallthrough_block = FindBlock(cur_offset +  width,
+    BasicBlock* fallthrough_block = FindBlock(cur_offset +  width,
                                              /*
                                               * If the method is processed
                                               * in sequential order from the
@@ -541,15 +542,14 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
    * Note also that the dex_pc_to_block_map_ entry for the potentially
    * throwing instruction will refer to the original basic block.
    */
-  BasicBlock *new_block = NewMemBB(kDalvikByteCode, num_blocks_++);
+  BasicBlock* new_block = NewMemBB(kDalvikByteCode, num_blocks_++);
   block_list_.Insert(new_block);
   new_block->start_offset = insn->offset;
   cur_block->fall_through = new_block->id;
   new_block->predecessors->Insert(cur_block->id);
   MIR* new_insn = NewMIR();
   *new_insn = *insn;
-  insn->dalvikInsn.opcode =
-      static_cast<Instruction::Code>(kMirOpCheck);
+  insn->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheck);
   // Associate the two halves.
   insn->meta.throw_insn = new_insn;
   new_block->AppendMIR(new_insn);
@@ -724,7 +724,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
       }
     }
     current_offset_ += width;
-    BasicBlock *next_block = FindBlock(current_offset_, /* split */ false, /* create */
+    BasicBlock* next_block = FindBlock(current_offset_, /* split */ false, /* create */
                                       false, /* immed_pred_block_p */ NULL);
     if (next_block) {
       /*
@@ -836,8 +836,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff
             } else {
               fprintf(file, "    {%04x %s %s %s\\l}%s\\\n", mir->offset,
                       mir->ssa_rep ? GetDalvikDisassembly(mir) :
-                      (opcode < kMirOpFirst) ?
-                        Instruction::Name(mir->dalvikInsn.opcode) :
+                      !IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) :
                         extended_mir_op_names_[opcode - kMirOpFirst],
                       (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
                       (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
@@ -1073,19 +1072,21 @@ bool BasicBlock::RemoveMIRList(MIR* first_list_mir, MIR* last_list_mir) {
     }
   }
 
-  // Remove the BB information and also find the after_list
+  // Remove the BB information and also find the after_list.
   for (MIR* mir = first_list_mir; mir != last_list_mir; mir = mir->next) {
     mir->bb = NullBasicBlockId;
   }
 
   after_list = last_list_mir->next;
 
-  // If there is nothing before the list, after_list is the first_mir
+  // If there is nothing before the list, after_list is the first_mir.
   if (before_list == nullptr) {
     first_mir_insn = after_list;
+  } else {
+    before_list->next = after_list;
   }
 
-  // If there is nothing after the list, before_list is last_mir
+  // If there is nothing after the list, before_list is last_mir.
   if (after_list == nullptr) {
     last_mir_insn = before_list;
   }
@@ -1140,7 +1141,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
     nop = true;
   }
 
-  if (opcode >= kMirOpFirst) {
+  if (IsPseudoMirOp(opcode)) {
     str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
   } else {
     dalvik_format = Instruction::FormatOf(insn.opcode);
@@ -1418,25 +1419,6 @@ void MIRGraph::SSATransformationStart() {
   temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector(
       temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapRegisterV);
 
-  /* Compute the DFS order */
-  ComputeDFSOrders();
-
-  /* Compute the dominator info */
-  ComputeDominators();
-
-  /* Allocate data structures in preparation for SSA conversion */
-  CompilerInitializeSSAConversion();
-
-  /* Find out the "Dalvik reg def x block" relation */
-  ComputeDefBlockMatrix();
-
-  /* Insert phi nodes to dominance frontiers for all variables */
-  InsertPhiNodes();
-
-  /* Rename register names by local defs and phi nodes */
-  ClearAllVisitedFlags();
-  DoDFSPreOrderSSARename(GetEntryBlock());
-
   // Update the maximum number of reachable blocks.
   max_num_reachable_blocks_ = num_reachable_blocks_;
 }
@@ -1454,7 +1436,7 @@ void MIRGraph::SSATransformationEnd() {
 }
 
 void MIRGraph::ComputeTopologicalSortOrder() {
-  std::queue<BasicBlock *> q;
+  std::queue<BasicBlock*> q;
   std::map<int, int> visited_cnt_values;
 
   // Clear the nodes.
@@ -1510,7 +1492,7 @@ void MIRGraph::ComputeTopologicalSortOrder() {
 
   while (q.size() > 0) {
     // Get top.
-    BasicBlock *bb = q.front();
+    BasicBlock* bb = q.front();
     q.pop();
 
     DCHECK_EQ(bb->hidden, false);
@@ -1528,7 +1510,7 @@ void MIRGraph::ComputeTopologicalSortOrder() {
 
       // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
       ChildBlockIterator succIter(bb, this);
-      BasicBlock *successor = succIter.Next();
+      BasicBlock* successor = succIter.Next();
       while (successor != nullptr) {
         // one more predecessor was visited.
         visited_cnt_values[successor->id]--;
@@ -1914,4 +1896,13 @@ BasicBlock* MIRGraph::CreateNewBB(BBType block_type) {
   return res;
 }
 
+void MIRGraph::CalculateBasicBlockInformation() {
+  PassDriverMEPostOpt driver(cu_);
+  driver.Launch();
+}
+
+void MIRGraph::InitializeBasicBlockData() {
+  num_blocks_ = block_list_.Size();
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 3655125182..38cd5ee449 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -905,11 +905,11 @@ class MIRGraph {
     return backward_branches_ + forward_branches_;
   }
 
-  bool IsPseudoMirOp(Instruction::Code opcode) {
+  static bool IsPseudoMirOp(Instruction::Code opcode) {
     return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst);
   }
 
-  bool IsPseudoMirOp(int opcode) {
+  static bool IsPseudoMirOp(int opcode) {
     return opcode >= static_cast<int>(kMirOpFirst);
   }
 
@@ -924,7 +924,7 @@ class MIRGraph {
   void VerifyDataflow();
   void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb);
   void EliminateNullChecksAndInferTypesStart();
-  bool EliminateNullChecksAndInferTypes(BasicBlock *bb);
+  bool EliminateNullChecksAndInferTypes(BasicBlock* bb);
   void EliminateNullChecksAndInferTypesEnd();
   bool EliminateClassInitChecksGate();
   bool EliminateClassInitChecks(BasicBlock* bb);
@@ -1030,6 +1030,14 @@ class MIRGraph {
 
   void AllocateSSAUseData(MIR *mir, int num_uses);
   void AllocateSSADefData(MIR *mir, int num_defs);
+  void CalculateBasicBlockInformation();
+  void InitializeBasicBlockData();
+  void ComputeDFSOrders();
+  void ComputeDefBlockMatrix();
+  void ComputeDominators();
+  void CompilerInitializeSSAConversion();
+  void InsertPhiNodes();
+  void DoDFSPreOrderSSARename(BasicBlock* block);
 
   /*
    * IsDebugBuild sanity check: keep track of the Dex PCs for catch entries so that later on
@@ -1046,7 +1054,6 @@ class MIRGraph {
 
   void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
   bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
-  void ComputeDFSOrders();
 
  protected:
   int FindCommonParent(int block1, int block2);
@@ -1055,7 +1062,6 @@ class MIRGraph {
   void HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v,
                        ArenaBitVector* live_in_v, int dalvik_reg_id);
   void HandleDef(ArenaBitVector* def_v, int dalvik_reg_id);
-  void CompilerInitializeSSAConversion();
   bool DoSSAConversion(BasicBlock* bb);
   bool InvokeUsesMethodStar(MIR* mir);
   int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction);
@@ -1082,11 +1088,7 @@ class MIRGraph {
   BasicBlock* NextUnvisitedSuccessor(BasicBlock* bb);
   void MarkPreOrder(BasicBlock* bb);
   void RecordDFSOrders(BasicBlock* bb);
-  void ComputeDefBlockMatrix();
   void ComputeDomPostOrderTraversal(BasicBlock* bb);
-  void ComputeDominators();
-  void InsertPhiNodes();
-  void DoDFSPreOrderSSARename(BasicBlock* block);
   void SetConstant(int32_t ssa_reg, int value);
   void SetConstantWide(int ssa_reg, int64_t value);
   int GetSSAUseCount(int s_reg);
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 1d4aef2183..1460ce631b 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -129,17 +129,16 @@ MIR* MIRGraph::FindMoveResult(BasicBlock* bb, MIR* mir) {
   BasicBlock* tbb = bb;
   mir = AdvanceMIR(&tbb, mir);
   while (mir != NULL) {
-    int opcode = mir->dalvikInsn.opcode;
     if ((mir->dalvikInsn.opcode == Instruction::MOVE_RESULT) ||
         (mir->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) ||
         (mir->dalvikInsn.opcode == Instruction::MOVE_RESULT_WIDE)) {
       break;
     }
     // Keep going if pseudo op, otherwise terminate
-    if (opcode < kNumPackedOpcodes) {
-      mir = NULL;
-    } else {
+    if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
       mir = AdvanceMIR(&tbb, mir);
+    } else {
+      mir = NULL;
     }
   }
   return mir;
@@ -320,9 +319,11 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
     return true;
   }
   bool use_lvn = bb->use_lvn;
+  std::unique_ptr<ScopedArenaAllocator> allocator;
   std::unique_ptr<LocalValueNumbering> local_valnum;
   if (use_lvn) {
-    local_valnum.reset(LocalValueNumbering::Create(cu_));
+    allocator.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
+    local_valnum.reset(new (allocator.get()) LocalValueNumbering(cu_, allocator.get()));
   }
   while (bb != NULL) {
     for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
@@ -415,7 +416,8 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
       // TODO: flesh out support for Mips.  NOTE: llvm's select op doesn't quite work here.
       // TUNING: expand to support IF_xx compare & branches
       if (!cu_->compiler->IsPortable() &&
-          (cu_->instruction_set == kThumb2 || cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) &&
+          (cu_->instruction_set == kArm64 || cu_->instruction_set == kThumb2 ||
+           cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) &&
           IsInstructionIfCcZ(mir->dalvikInsn.opcode)) {
         BasicBlock* ft = GetBasicBlock(bb->fall_through);
         DCHECK(ft != NULL);
@@ -441,6 +443,8 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
           if (SelectKind(tk->last_mir_insn) == kSelectGoto) {
               tk->last_mir_insn->optimization_flags |= (MIR_IGNORE_SUSPEND_CHECK);
           }
+
+          // TODO: Add logic for LONG.
           // Are the block bodies something we can handle?
           if ((ft->first_mir_insn == ft->last_mir_insn) &&
               (tk->first_mir_insn != tk->last_mir_insn) &&
@@ -550,6 +554,9 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
     }
     bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) : NULL;
   }
+  if (use_lvn && UNLIKELY(!local_valnum->Good())) {
+    LOG(WARNING) << "LVN overflow in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+  }
 
   return true;
 }
@@ -861,7 +868,7 @@ bool MIRGraph::EliminateNullChecksAndInferTypes(BasicBlock* bb) {
           struct BasicBlock* next_bb = GetBasicBlock(bb->fall_through);
           for (MIR* tmir = next_bb->first_mir_insn; tmir != NULL;
             tmir =tmir->next) {
-            if (static_cast<int>(tmir->dalvikInsn.opcode) >= static_cast<int>(kMirOpFirst)) {
+            if (IsPseudoMirOp(tmir->dalvikInsn.opcode)) {
               continue;
             }
             // First non-pseudo should be MOVE_RESULT_OBJECT
@@ -1178,6 +1185,9 @@ void MIRGraph::InlineCalls(BasicBlock* bb) {
     return;
   }
   for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
+    if (IsPseudoMirOp(mir->dalvikInsn.opcode)) {
+      continue;
+    }
     if (!(Instruction::FlagsOf(mir->dalvikInsn.opcode) & Instruction::kInvoke)) {
       continue;
     }
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 86092b6e3d..69c394f168 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -193,7 +193,7 @@ class ClassInitCheckEliminationTest : public testing::Test {
     ASSERT_TRUE(gate_result);
     RepeatingPreOrderDfsIterator iterator(cu_.mir_graph.get());
     bool change = false;
-    for (BasicBlock *bb = iterator.Next(change); bb != 0; bb = iterator.Next(change)) {
+    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
       change = cu_.mir_graph->EliminateClassInitChecks(bb);
     }
     cu_.mir_graph->EliminateClassInitChecksEnd();
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index 4ce040e9ab..b4906d67df 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -89,6 +89,21 @@ class Pass {
     return false;
   }
 
+  static void BasePrintMessage(CompilationUnit* c_unit, const char* pass_name, const char* message, ...) {
+    // Check if we want to log something or not.
+    if (c_unit->print_pass) {
+      // Stringify the message.
+      va_list args;
+      va_start(args, message);
+      std::string stringified_message;
+      StringAppendV(&stringified_message, message, args);
+      va_end(args);
+
+      // Log the message and ensure to include pass name.
+      LOG(INFO) << pass_name << ": " << stringified_message;
+    }
+  }
+
  protected:
   /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */
   const char* const pass_name_;
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index aa0d1ae462..bd8f53cd5a 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -141,7 +141,6 @@ class PassDriver {
     }
   }
 
- protected:
   /**
    * @brief Gets the list of passes currently schedule to execute.
    * @return pass_list_
@@ -150,14 +149,27 @@ class PassDriver {
     return pass_list_;
   }
 
-  virtual void InitializePasses() {
-    SetDefaultPasses();
+  static void SetPrintAllPasses() {
+    default_print_passes_ = true;
+  }
+
+  static void SetDumpPassList(const std::string& list) {
+    dump_pass_list_ = list;
+  }
+
+  static void SetPrintPassList(const std::string& list) {
+    print_pass_list_ = list;
   }
 
   void SetDefaultPasses() {
     pass_list_ = PassDriver<PassDriverType>::g_default_pass_list;
   }
 
+ protected:
+  virtual void InitializePasses() {
+    SetDefaultPasses();
+  }
+
   /**
    * @brief Apply a patch: perform start/work/end functions.
    */
@@ -185,6 +197,15 @@ class PassDriver {
 
   /** @brief The default pass list is used to initialize pass_list_. */
   static std::vector<const Pass*> g_default_pass_list;
+
+  /** @brief Do we, by default, want to be printing the log messages? */
+  static bool default_print_passes_;
+
+  /** @brief What are the passes we want to be printing the log messages? */
+  static std::string print_pass_list_;
+
+  /** @brief What are the passes we want to be dumping the CFG? */
+  static std::string dump_pass_list_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver_me.cc b/compiler/dex/pass_driver_me.cc
deleted file mode 100644
index d0545004f7..0000000000
--- a/compiler/dex/pass_driver_me.cc
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/macros.h"
-#include "bb_optimizations.h"
-#include "compiler_internals.h"
-#include "dataflow_iterator.h"
-#include "dataflow_iterator-inl.h"
-#include "pass_driver_me.h"
-
-namespace art {
-
-namespace {  // anonymous namespace
-
-void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass, DataflowIterator* iterator) {
-  // Paranoid: Check the iterator before walking the BasicBlocks.
-  DCHECK(iterator != nullptr);
-  bool change = false;
-  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
-    data->bb = bb;
-    change = pass->Worker(data);
-  }
-}
-
-template <typename Iterator>
-inline void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) {
-  DCHECK(data != nullptr);
-  CompilationUnit* c_unit = data->c_unit;
-  DCHECK(c_unit != nullptr);
-  Iterator iterator(c_unit->mir_graph.get());
-  DoWalkBasicBlocks(data, pass, &iterator);
-}
-}  // anonymous namespace
-
-/*
- * Create the pass list. These passes are immutable and are shared across the threads.
- *
- * Advantage is that there will be no race conditions here.
- * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
- *   - This is not yet an issue: no current pass would require it.
- */
-// The initial list of passes to be used by the PassDriveME.
-template<>
-const Pass* const PassDriver<PassDriverME>::g_passes[] = {
-  GetPassInstance<CacheFieldLoweringInfo>(),
-  GetPassInstance<CacheMethodLoweringInfo>(),
-  GetPassInstance<CallInlining>(),
-  GetPassInstance<CodeLayout>(),
-  GetPassInstance<SSATransformation>(),
-  GetPassInstance<ConstantPropagation>(),
-  GetPassInstance<InitRegLocations>(),
-  GetPassInstance<MethodUseCount>(),
-  GetPassInstance<NullCheckEliminationAndTypeInference>(),
-  GetPassInstance<ClassInitCheckElimination>(),
-  GetPassInstance<BBCombine>(),
-  GetPassInstance<BBOptimizations>(),
-};
-
-// The number of the passes in the initial list of Passes (g_passes).
-template<>
-uint16_t const PassDriver<PassDriverME>::g_passes_size = arraysize(PassDriver<PassDriverME>::g_passes);
-
-// The default pass list is used by the PassDriverME instance of PassDriver to initialize pass_list_.
-template<>
-std::vector<const Pass*> PassDriver<PassDriverME>::g_default_pass_list(PassDriver<PassDriverME>::g_passes, PassDriver<PassDriverME>::g_passes + PassDriver<PassDriverME>::g_passes_size);
-
-PassDriverME::PassDriverME(CompilationUnit* cu)
-    : PassDriver(), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") {
-  pass_me_data_holder_.bb = nullptr;
-  pass_me_data_holder_.c_unit = cu;
-}
-
-PassDriverME::~PassDriverME() {
-}
-
-void PassDriverME::DispatchPass(const Pass* pass) {
-  VLOG(compiler) << "Dispatching " << pass->GetName();
-  const PassME* me_pass = down_cast<const PassME*>(pass);
-
-  DataFlowAnalysisMode mode = me_pass->GetTraversal();
-
-  switch (mode) {
-    case kPreOrderDFSTraversal:
-      DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-      break;
-    case kRepeatingPreOrderDFSTraversal:
-      DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-      break;
-    case kRepeatingPostOrderDFSTraversal:
-      DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-      break;
-    case kReversePostOrderDFSTraversal:
-      DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-      break;
-    case kRepeatingReversePostOrderDFSTraversal:
-      DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
-      break;
-    case kPostOrderDOMTraversal:
-      DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass);
-      break;
-    case kAllNodes:
-      DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
-      break;
-    case kNoNodes:
-      break;
-    default:
-      LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode;
-      break;
-  }
-}
-
-bool PassDriverME::RunPass(const Pass* pass, bool time_split) {
-  // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name
-  DCHECK(pass != nullptr);
-  DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
-  CompilationUnit* c_unit = pass_me_data_holder_.c_unit;
-  DCHECK(c_unit != nullptr);
-
-  // Do we perform a time split
-  if (time_split) {
-    c_unit->NewTimingSplit(pass->GetName());
-  }
-
-  // Check the pass gate first.
-  bool should_apply_pass = pass->Gate(&pass_me_data_holder_);
-  if (should_apply_pass) {
-    // Applying the pass: first start, doWork, and end calls.
-    ApplyPass(&pass_me_data_holder_, pass);
-
-    // Do we want to log it?
-    if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
-      // Do we have a pass folder?
-      const PassME* me_pass = (down_cast<const PassME*>(pass));
-      const char* passFolder = me_pass->GetDumpCFGFolder();
-      DCHECK(passFolder != nullptr);
-
-      if (passFolder[0] != 0) {
-        // Create directory prefix.
-        std::string prefix = GetDumpCFGFolder();
-        prefix += passFolder;
-        prefix += "/";
-
-        c_unit->mir_graph->DumpCFG(prefix.c_str(), false);
-      }
-    }
-  }
-
-  // If the pass gate passed, we can declare success.
-  return should_apply_pass;
-}
-
-const char* PassDriverME::GetDumpCFGFolder() const {
-  return dump_cfg_folder_;
-}
-
-
-}  // namespace art
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
index 0142934be2..7d76fb83d4 100644
--- a/compiler/dex/pass_driver_me.h
+++ b/compiler/dex/pass_driver_me.h
@@ -18,28 +18,155 @@
 #define ART_COMPILER_DEX_PASS_DRIVER_ME_H_
 
 #include "bb_optimizations.h"
+#include "dataflow_iterator.h"
+#include "dataflow_iterator-inl.h"
 #include "pass_driver.h"
 #include "pass_me.h"
 
 namespace art {
 
-class PassDriverME: public PassDriver<PassDriverME> {
+template <typename PassDriverType>
+class PassDriverME: public PassDriver<PassDriverType> {
  public:
-  explicit PassDriverME(CompilationUnit* cu);
-  ~PassDriverME();
-  /**
-   * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode
-   */
-  void DispatchPass(const Pass* pass);
-  bool RunPass(const Pass* pass, bool time_split = false);
-  const char* GetDumpCFGFolder() const;
+  explicit PassDriverME(CompilationUnit* cu)
+      : pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") {
+        pass_me_data_holder_.bb = nullptr;
+        pass_me_data_holder_.c_unit = cu;
+  }
+
+  ~PassDriverME() {
+  }
+
+  void DispatchPass(const Pass* pass) {
+    VLOG(compiler) << "Dispatching " << pass->GetName();
+    const PassME* me_pass = down_cast<const PassME*>(pass);
+
+    DataFlowAnalysisMode mode = me_pass->GetTraversal();
+
+    switch (mode) {
+      case kPreOrderDFSTraversal:
+        DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kRepeatingPreOrderDFSTraversal:
+        DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kRepeatingPostOrderDFSTraversal:
+        DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kReversePostOrderDFSTraversal:
+        DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kRepeatingReversePostOrderDFSTraversal:
+        DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kPostOrderDOMTraversal:
+        DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kAllNodes:
+        DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kNoNodes:
+        break;
+      default:
+        LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode;
+        break;
+    }
+  }
+
+  bool RunPass(const Pass* pass, bool time_split) {
+    // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name
+    DCHECK(pass != nullptr);
+    DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
+    CompilationUnit* c_unit = pass_me_data_holder_.c_unit;
+    DCHECK(c_unit != nullptr);
+
+    // Do we perform a time split
+    if (time_split) {
+      c_unit->NewTimingSplit(pass->GetName());
+    }
+
+    // Check the pass gate first.
+    bool should_apply_pass = pass->Gate(&pass_me_data_holder_);
+    if (should_apply_pass) {
+      bool old_print_pass = c_unit->print_pass;
+
+      c_unit->print_pass = PassDriver<PassDriverType>::default_print_passes_;
+
+      const char* print_pass_list = PassDriver<PassDriverType>::print_pass_list_.c_str();
+
+      if (print_pass_list != nullptr && strstr(print_pass_list, pass->GetName()) != nullptr) {
+        c_unit->print_pass = true;
+      }
+
+      // Applying the pass: first start, doWork, and end calls.
+      this->ApplyPass(&pass_me_data_holder_, pass);
+
+      bool should_dump = ((c_unit->enable_debug & (1 << kDebugDumpCFG)) != 0);
+
+      const char* dump_pass_list = PassDriver<PassDriverType>::dump_pass_list_.c_str();
+
+      if (dump_pass_list != nullptr) {
+        bool found = strstr(dump_pass_list, pass->GetName());
+        should_dump = (should_dump || found);
+      }
+
+      if (should_dump) {
+        // Do we want to log it?
+        if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
+          // Do we have a pass folder?
+          const PassME* me_pass = (down_cast<const PassME*>(pass));
+          const char* passFolder = me_pass->GetDumpCFGFolder();
+          DCHECK(passFolder != nullptr);
+
+          if (passFolder[0] != 0) {
+            // Create directory prefix.
+            std::string prefix = GetDumpCFGFolder();
+            prefix += passFolder;
+            prefix += "/";
+
+            c_unit->mir_graph->DumpCFG(prefix.c_str(), false);
+          }
+        }
+      }
+
+      c_unit->print_pass = old_print_pass;
+    }
+
+    // If the pass gate passed, we can declare success.
+    return should_apply_pass;
+  }
+
+  const char* GetDumpCFGFolder() const {
+    return dump_cfg_folder_;
+  }
+
  protected:
   /** @brief The data holder that contains data needed for the PassDriverME. */
   PassMEDataHolder pass_me_data_holder_;
 
   /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */
   const char* dump_cfg_folder_;
-};
 
+  static void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass,
+                                DataflowIterator* iterator) {
+    // Paranoid: Check the iterator before walking the BasicBlocks.
+    DCHECK(iterator != nullptr);
+    bool change = false;
+    for (BasicBlock* bb = iterator->Next(change); bb != nullptr; bb = iterator->Next(change)) {
+      data->bb = bb;
+      change = pass->Worker(data);
+    }
+  }
+
+  template <typename Iterator>
+  inline static void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) {
+      DCHECK(data != nullptr);
+      CompilationUnit* c_unit = data->c_unit;
+      DCHECK(c_unit != nullptr);
+      Iterator iterator(c_unit->mir_graph.get());
+      DoWalkBasicBlocks(data, pass, &iterator);
+    }
+};
 }  // namespace art
 #endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_H_
+
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
new file mode 100644
index 0000000000..52a2273c40
--- /dev/null
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/macros.h"
+#include "bb_optimizations.h"
+#include "compiler_internals.h"
+#include "dataflow_iterator.h"
+#include "dataflow_iterator-inl.h"
+#include "pass_driver_me_opts.h"
+
+namespace art {
+
+/*
+ * Create the pass list. These passes are immutable and are shared across the threads.
+ *
+ * Advantage is that there will be no race conditions here.
+ * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
+ *   - This is not yet an issue: no current pass would require it.
+ */
+// The initial list of passes to be used by the PassDriveMEOpts.
+template<>
+const Pass* const PassDriver<PassDriverMEOpts>::g_passes[] = {
+  GetPassInstance<CacheFieldLoweringInfo>(),
+  GetPassInstance<CacheMethodLoweringInfo>(),
+  GetPassInstance<CallInlining>(),
+  GetPassInstance<CodeLayout>(),
+  GetPassInstance<NullCheckEliminationAndTypeInference>(),
+  GetPassInstance<ClassInitCheckElimination>(),
+  GetPassInstance<BBCombine>(),
+  GetPassInstance<BBOptimizations>(),
+};
+
+// The number of the passes in the initial list of Passes (g_passes).
+template<>
+uint16_t const PassDriver<PassDriverMEOpts>::g_passes_size =
+    arraysize(PassDriver<PassDriverMEOpts>::g_passes);
+
+// The default pass list is used by the PassDriverME instance of PassDriver
+// to initialize pass_list_.
+template<>
+std::vector<const Pass*> PassDriver<PassDriverMEOpts>::g_default_pass_list(
+    PassDriver<PassDriverMEOpts>::g_passes,
+    PassDriver<PassDriverMEOpts>::g_passes +
+    PassDriver<PassDriverMEOpts>::g_passes_size);
+
+// By default, do not have a dump pass list.
+template<>
+std::string PassDriver<PassDriverMEOpts>::dump_pass_list_ = std::string();
+
+// By default, do not have a print pass list.
+template<>
+std::string PassDriver<PassDriverMEOpts>::print_pass_list_ = std::string();
+
+// By default, we do not print the pass' information.
+template<>
+bool PassDriver<PassDriverMEOpts>::default_print_passes_ = false;
+
+void PassDriverMEOpts::ApplyPass(PassDataHolder* data, const Pass* pass) {
+  // First call the base class' version.
+  PassDriver::ApplyPass(data, pass);
+
+  const PassME* pass_me = down_cast<const PassME*> (pass);
+  DCHECK(pass_me != nullptr);
+
+  PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
+
+  // Now we care about flags.
+  if ((pass_me->GetFlag(kOptimizationBasicBlockChange) == true) ||
+      (pass_me->GetFlag(kOptimizationDefUsesChange) == true)) {
+    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+    c_unit->mir_graph.get()->CalculateBasicBlockInformation();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/dex/pass_driver_me_opts.h b/compiler/dex/pass_driver_me_opts.h
new file mode 100644
index 0000000000..0a5b5aec99
--- /dev/null
+++ b/compiler/dex/pass_driver_me_opts.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_
+#define ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_
+
+#include "pass_driver_me.h"
+
+namespace art {
+
+// Forward Declarations.
+struct CompilationUnit;
+class Pass;
+class PassDataHolder;
+
+class PassDriverMEOpts : public PassDriverME<PassDriverMEOpts> {
+ public:
+  explicit PassDriverMEOpts(CompilationUnit* cu):PassDriverME<PassDriverMEOpts>(cu) {
+  }
+
+  ~PassDriverMEOpts() {
+  }
+
+  /**
+   * @brief Apply a patch: perform start/work/end functions.
+   */
+  virtual void ApplyPass(PassDataHolder* data, const Pass* pass);
+};
+
+}  // namespace art
+#endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_
diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc
new file mode 100644
index 0000000000..cb63f4184f
--- /dev/null
+++ b/compiler/dex/pass_driver_me_post_opt.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/macros.h"
+#include "post_opt_passes.h"
+#include "compiler_internals.h"
+#include "pass_driver_me_post_opt.h"
+
+namespace art {
+
+/*
+ * Create the pass list. These passes are immutable and are shared across the threads.
+ *
+ * Advantage is that there will be no race conditions here.
+ * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
+ *   - This is not yet an issue: no current pass would require it.
+ */
+// The initial list of passes to be used by the PassDriveMEPostOpt.
+template<>
+const Pass* const PassDriver<PassDriverMEPostOpt>::g_passes[] = {
+  GetPassInstance<InitializeData>(),
+  GetPassInstance<ClearPhiInstructions>(),
+  GetPassInstance<CalculatePredecessors>(),
+  GetPassInstance<DFSOrders>(),
+  GetPassInstance<BuildDomination>(),
+  GetPassInstance<DefBlockMatrix>(),
+  GetPassInstance<CreatePhiNodes>(),
+  GetPassInstance<ClearVisitedFlag>(),
+  GetPassInstance<SSAConversion>(),
+  GetPassInstance<PhiNodeOperands>(),
+  GetPassInstance<ConstantPropagation>(),
+  GetPassInstance<PerformInitRegLocations>(),
+  GetPassInstance<MethodUseCount>(),
+  GetPassInstance<FreeData>(),
+};
+
+// The number of the passes in the initial list of Passes (g_passes).
+template<>
+uint16_t const PassDriver<PassDriverMEPostOpt>::g_passes_size =
+    arraysize(PassDriver<PassDriverMEPostOpt>::g_passes);
+
+// The default pass list is used by the PassDriverME instance of PassDriver
+// to initialize pass_list_.
+template<>
+std::vector<const Pass*> PassDriver<PassDriverMEPostOpt>::g_default_pass_list(
+    PassDriver<PassDriverMEPostOpt>::g_passes,
+    PassDriver<PassDriverMEPostOpt>::g_passes +
+    PassDriver<PassDriverMEPostOpt>::g_passes_size);
+
+// By default, do not have a dump pass list.
+template<>
+std::string PassDriver<PassDriverMEPostOpt>::dump_pass_list_ = std::string();
+
+// By default, do not have a print pass list.
+template<>
+std::string PassDriver<PassDriverMEPostOpt>::print_pass_list_ = std::string();
+
+// By default, we do not print the pass' information.
+template<>
+bool PassDriver<PassDriverMEPostOpt>::default_print_passes_ = false;
+
+}  // namespace art
diff --git a/compiler/dex/pass_driver_me_post_opt.h b/compiler/dex/pass_driver_me_post_opt.h
new file mode 100644
index 0000000000..574a6ba04d
--- /dev/null
+++ b/compiler/dex/pass_driver_me_post_opt.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_
+#define ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_
+
+#include "pass_driver_me.h"
+
+namespace art {
+
+// Forward Declarations.
+struct CompilationUnit;
+class Pass;
+class PassDataHolder;
+
+class PassDriverMEPostOpt : public PassDriverME<PassDriverMEPostOpt> {
+ public:
+  explicit PassDriverMEPostOpt(CompilationUnit* cu) : PassDriverME<PassDriverMEPostOpt>(cu) {
+  }
+
+  ~PassDriverMEPostOpt() {
+  }
+};
+
+}  // namespace art
+#endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
index 069fb45dc4..9efd5aeb40 100644
--- a/compiler/dex/pass_me.h
+++ b/compiler/dex/pass_me.h
@@ -32,6 +32,9 @@ class Pass;
  * @details Each enum should be a power of 2 to be correctly used.
  */
 enum OptimizationFlag {
+  kOptimizationBasicBlockChange = 1,  /**< @brief Has there been a change to a BasicBlock? */
+  kOptimizationDefUsesChange = 2,     /**< @brief Has there been a change to a def-use? */
+  kLoopStructureChange = 4,           /**< @brief Has there been a loop structural change? */
 };
 
 // Data holder class.
@@ -93,7 +96,7 @@ class PassME: public Pass {
   /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */
   const DataFlowAnalysisMode traversal_type_;
 
-  /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */
+  /** @brief Flags for additional directives: used to determine if a particular post-optimization pass is necessary. */
   const unsigned int flags_;
 
   /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 576e2424fa..fd67608e73 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -712,7 +712,7 @@ bool MirConverter::ConvertMIRNode(MIR* mir, BasicBlock* bb,
   int opt_flags = mir->optimization_flags;
 
   if (cu_->verbose) {
-    if (op_val < kMirOpFirst) {
+    if (!IsPseudoMirOp(op_val)) {
       LOG(INFO) << ".. " << Instruction::Name(opcode) << " 0x" << std::hex << op_val;
     } else {
       LOG(INFO) << mir_graph_->extended_mir_op_names_[op_val - kMirOpFirst] << " 0x" << std::hex << op_val;
@@ -1550,7 +1550,7 @@ void MirConverter::HandlePhiNodes(BasicBlock* bb, ::llvm::BasicBlock* llvm_bb) {
   SetDexOffset(bb->start_offset);
   for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     int opcode = mir->dalvikInsn.opcode;
-    if (opcode < kMirOpFirst) {
+    if (!IsPseudoMirOp(opcode)) {
       // Stop after first non-pseudo MIR op.
       continue;
     }
@@ -1759,7 +1759,7 @@ bool MirConverter::BlockBitcodeConversion(BasicBlock* bb) {
       }
     }
 
-    if (opcode >= kMirOpFirst) {
+    if (IsPseudoMirOp(opcode)) {
       ConvertExtendedMIR(bb, mir, llvm_bb);
       continue;
     }
diff --git a/compiler/dex/post_opt_passes.cc b/compiler/dex/post_opt_passes.cc
new file mode 100644
index 0000000000..58700a4bd3
--- /dev/null
+++ b/compiler/dex/post_opt_passes.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "post_opt_passes.h"
+#include "dataflow_iterator.h"
+#include "dataflow_iterator-inl.h"
+
+namespace art {
+
+/*
+ * MethodUseCount pass implementation start.
+ */
+bool MethodUseCount::Gate(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(c_unit != nullptr);
+  // First initialize the data.
+  c_unit->mir_graph->InitializeMethodUses();
+
+  // Now check if the pass is to be ignored.
+  bool res = ((c_unit->disable_opt & (1 << kPromoteRegs)) == 0);
+
+  return res;
+}
+
+bool MethodUseCount::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+  DCHECK(c_unit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
+  c_unit->mir_graph->CountUses(bb);
+  // No need of repeating, so just return false.
+  return false;
+}
+
+
+bool ClearPhiInstructions::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+  DCHECK(c_unit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
+  MIR* mir = bb->first_mir_insn;
+
+  while (mir != nullptr) {
+    MIR* next = mir->next;
+
+    Instruction::Code opcode = mir->dalvikInsn.opcode;
+
+    if (opcode == static_cast<Instruction::Code> (kMirOpPhi)) {
+      bb->RemoveMIR(mir);
+    }
+
+    mir = next;
+  }
+
+  // We do not care in reporting a change or not in the MIR.
+  return false;
+}
+
+void CalculatePredecessors::Start(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(c_unit != nullptr);
+  // First get the MIRGraph here to factorize a bit the code.
+  MIRGraph *mir_graph = c_unit->mir_graph.get();
+
+  // First clear all predecessors.
+  AllNodesIterator first(mir_graph);
+  for (BasicBlock* bb = first.Next(); bb != nullptr; bb = first.Next()) {
+    bb->predecessors->Reset();
+  }
+
+  // Now calculate all predecessors.
+  AllNodesIterator second(mir_graph);
+  for (BasicBlock* bb = second.Next(); bb != nullptr; bb = second.Next()) {
+    // We only care about non hidden blocks.
+    if (bb->hidden == true) {
+      continue;
+    }
+
+    // Create iterator for visiting children.
+    ChildBlockIterator child_iter(bb, mir_graph);
+
+    // Now iterate through the children to set the predecessor bits.
+    for (BasicBlock* child = child_iter.Next(); child != nullptr; child = child_iter.Next()) {
+      child->predecessors->Insert(bb->id);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
new file mode 100644
index 0000000000..f2035052c9
--- /dev/null
+++ b/compiler/dex/post_opt_passes.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_POST_OPT_PASSES_H_
+#define ART_COMPILER_DEX_POST_OPT_PASSES_H_
+
+#include "compiler_internals.h"
+#include "pass_me.h"
+
+namespace art {
+
+/**
+ * @class InitializeData
+ * @brief There is some data that needs to be initialized before performing
+ * the post optimization passes.
+ */
+class InitializeData : public PassME {
+ public:
+  InitializeData() : PassME("InitializeData") {
+  }
+
+  void Start(const PassDataHolder* data) const {
+    // New blocks may have been inserted so the first thing we do is ensure that
+    // the c_unit's number of blocks matches the actual count of basic blocks.
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->InitializeBasicBlockData();
+    c_unit->mir_graph.get()->SSATransformationStart();
+  }
+};
+
+/**
+ * @class MethodUseCount
+ * @brief Count the register uses of the method
+ */
+class MethodUseCount : public PassME {
+ public:
+  MethodUseCount() : PassME("UseCount") {
+  }
+
+  bool Worker(const PassDataHolder* data) const;
+
+  bool Gate(const PassDataHolder* data) const;
+};
+
+/**
+ * @class ClearPhiInformation
+ * @brief Clear the PHI nodes from the CFG.
+ */
+class ClearPhiInstructions : public PassME {
+ public:
+  ClearPhiInstructions() : PassME("ClearPhiInstructions") {
+  }
+
+  bool Worker(const PassDataHolder* data) const;
+};
+
+/**
+ * @class CalculatePredecessors
+ * @brief Calculate the predecessor BitVector of each Basicblock.
+ */
+class CalculatePredecessors : public PassME {
+ public:
+  CalculatePredecessors() : PassME("CalculatePredecessors") {
+  }
+
+  void Start(const PassDataHolder* data) const;
+};
+
+/**
+ * @class DFSOrders
+ * @brief Compute the DFS order of the MIR graph
+ */
+class DFSOrders : public PassME {
+ public:
+  DFSOrders() : PassME("DFSOrders") {
+  }
+
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->ComputeDFSOrders();
+  }
+};
+
+/**
+ * @class BuildDomination
+ * @brief Build the domination information of the MIR Graph
+ */
+class BuildDomination : public PassME {
+ public:
+  BuildDomination() : PassME("BuildDomination") {
+  }
+
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->ComputeDominators();
+    c_unit->mir_graph.get()->CompilerInitializeSSAConversion();
+  }
+
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    // Verify the dataflow information after the pass.
+    if (c_unit->enable_debug & (1 << kDebugVerifyDataflow)) {
+      c_unit->mir_graph->VerifyDataflow();
+    }
+  }
+};
+
+/**
+ * @class DefBlockMatrix
+ * @brief Calculate the matrix of definition per basic block
+ */
+class DefBlockMatrix : public PassME {
+ public:
+  DefBlockMatrix() : PassME("DefBlockMatrix") {
+  }
+
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->ComputeDefBlockMatrix();
+  }
+};
+
+/**
+ * @class CreatePhiNodes
+ * @brief Pass to create the phi nodes after SSA calculation
+ */
+class CreatePhiNodes : public PassME {
+ public:
+  CreatePhiNodes() : PassME("CreatePhiNodes") {
+  }
+
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->InsertPhiNodes();
+  }
+};
+
+/**
+ * @class ClearVisitedFlag
+ * @brief Pass to clear the visited flag for all basic blocks.
+ */
+
+class ClearVisitedFlag : public PassME {
+ public:
+  ClearVisitedFlag() : PassME("ClearVisitedFlag") {
+  }
+
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->ClearAllVisitedFlags();
+  }
+};
+
+/**
+ * @class SSAConversion
+ * @brief Pass for SSA conversion of MIRs
+ */
+class SSAConversion : public PassME {
+ public:
+  SSAConversion() : PassME("SSAConversion") {
+  }
+
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    MIRGraph *mir_graph = c_unit->mir_graph.get();
+    mir_graph->DoDFSPreOrderSSARename(mir_graph->GetEntryBlock());
+  }
+};
+
+/**
+ * @class PhiNodeOperands
+ * @brief Pass to insert the Phi node operands to basic blocks
+ */
+class PhiNodeOperands : public PassME {
+ public:
+  PhiNodeOperands() : PassME("PhiNodeOperands", kPreOrderDFSTraversal) {
+  }
+
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    BasicBlock* bb = down_cast<const PassMEDataHolder*>(data)->bb;
+    DCHECK(bb != nullptr);
+    c_unit->mir_graph->InsertPhiNodeOperands(bb);
+    // No need of repeating, so just return false.
+    return false;
+  }
+};
+
+/**
+ * @class InitRegLocations
+ * @brief Initialize Register Locations.
+ */
+class PerformInitRegLocations : public PassME {
+ public:
+  PerformInitRegLocations() : PassME("PerformInitRegLocation") {
+  }
+
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph->InitRegLocations();
+  }
+};
+
+/**
+ * @class ConstantPropagation
+ * @brief Perform a constant propagation pass.
+ */
+class ConstantPropagation : public PassME {
+ public:
+  ConstantPropagation() : PassME("ConstantPropagation") {
+  }
+
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    BasicBlock* bb = down_cast<const PassMEDataHolder*>(data)->bb;
+    DCHECK(bb != nullptr);
+    c_unit->mir_graph->DoConstantPropagation(bb);
+    // No need of repeating, so just return false.
+    return false;
+  }
+
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph->InitializeConstantPropagation();
+  }
+};
+
+/**
+ * @class FreeData
+ * @brief There is some data that needs to be freed after performing the post optimization passes.
+ */
+class FreeData : public PassME {
+ public:
+  FreeData() : PassME("FreeData") {
+  }
+
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->SSATransformationEnd();
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_POST_OPT_PASSES_H_
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5d74b8dee3..9f9e61845c 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -313,11 +313,11 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
 
 void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  RegStorage reset_reg = AllocTempRef();
+  LoadRefDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
+  StoreRefDisp(rs_rARM_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index f0a9ca4e82..9c801a520b 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -59,6 +59,7 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index bb02f74751..e06d814e16 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -51,7 +51,7 @@ void ArmMir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -94,7 +94,7 @@ void ArmMir2Lir::GenArithOpDouble(Instruction::Code opcode,
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
@@ -141,8 +141,11 @@ void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, Re
       break;
     case Instruction::LONG_TO_DOUBLE: {
       rl_src = LoadValueWide(rl_src, kFPReg);
-      RegStorage src_low = rl_src.reg.DoubleToLowSingle();
-      RegStorage src_high = rl_src.reg.DoubleToHighSingle();
+      RegisterInfo* info = GetRegInfo(rl_src.reg);
+      RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
+      DCHECK(src_low.Valid());
+      RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
+      DCHECK(src_high.Valid());
       rl_result = EvalLoc(rl_dest, kFPReg, true);
       RegStorage tmp1 = AllocTempDouble();
       RegStorage tmp2 = AllocTempDouble();
@@ -161,8 +164,11 @@ void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, Re
       return;
     case Instruction::LONG_TO_FLOAT: {
       rl_src = LoadValueWide(rl_src, kFPReg);
-      RegStorage src_low = rl_src.reg.DoubleToLowSingle();
-      RegStorage src_high = rl_src.reg.DoubleToHighSingle();
+      RegisterInfo* info = GetRegInfo(rl_src.reg);
+      RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg();
+      DCHECK(src_low.Valid());
+      RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg();
+      DCHECK(src_high.Valid());
       rl_result = EvalLoc(rl_dest, kFPReg, true);
       // Allocate temp registers.
       RegStorage high_val = AllocTempDouble();
@@ -334,22 +340,11 @@ void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
 
 bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
-  LIR *branch;
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
   rl_src = LoadValueWide(rl_src, kFPReg);
   RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
   NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
-  NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg());
-  NewLIR0(kThumb2Fmstat);
-  branch = NewLIR2(kThumbBCond, 0, kArmCondEq);
-  ClobberCallerSave();
-  LockCallTemps();  // Using fixed registers
-  RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt));
-  NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg());
-  NewLIR1(kThumbBlxR, r_tgt.GetReg());
-  NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg());
-  branch->target = NewLIR0(kPseudoTargetLabel);
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 2556788bed..769122d8ec 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -206,13 +206,16 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  // Avoid using float regs here.
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ConditionCode ccode = mir->meta.ccode;
   if (mir->ssa_rep->num_uses == 1) {
     // CONST case
     int true_val = mir->dalvikInsn.vB;
     int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
     // Change kCondNe to kCondEq for the special cases below.
     if (ccode == kCondNe) {
       ccode = kCondEq;
@@ -239,8 +242,8 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
     } else {
       // Unlikely case - could be tuned.
-      RegStorage t_reg1 = AllocTemp();
-      RegStorage t_reg2 = AllocTemp();
+      RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class);
+      RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
       LoadConstant(t_reg1, true_val);
       LoadConstant(t_reg2, false_val);
       OpRegImm(kOpCmp, rl_src.reg, 0);
@@ -253,9 +256,9 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
     // MOVE case
     RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
     RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
     OpRegImm(kOpCmp, rl_src.reg, 0);
     LIR* it = nullptr;
     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
@@ -814,10 +817,10 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    rl_new_value = LoadValue(rl_src_new_value);
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -840,7 +843,7 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+    rl_expected = LoadValue(rl_src_expected);
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
@@ -1047,7 +1050,7 @@ void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
       ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
       FlushAllRegs();
       CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
-      rl_result = GetReturnWide(false);
+      rl_result = GetReturnWide(kCoreReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     }
@@ -1126,7 +1129,7 @@ void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
     if (reg_status != 0) {
       // We had manually allocated registers for rl_result.
       // Now construct a RegLocation.
-      rl_result = GetReturnWide(false);  // Just using as a template.
+      rl_result = GetReturnWide(kCoreReg);  // Just using as a template.
       rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
     }
 
@@ -1168,7 +1171,7 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
   int data_offset;
   RegLocation rl_result;
   bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -1203,7 +1206,7 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
     } else {
       // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTemp();
+      reg_ptr = AllocTempRef();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
       FreeTemp(rl_index.reg);
     }
@@ -1229,7 +1232,7 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
     }
   } else {
     // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTemp();
+    RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -1267,7 +1270,7 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -1281,7 +1284,7 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
   }
 
   /* null object? */
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 1520c52a7a..5340d8370a 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -25,47 +25,43 @@
 
 namespace art {
 
-// TODO: rework this when c++11 support allows.
-static const RegStorage core_regs_arr[] =
+static constexpr RegStorage core_regs_arr[] =
     {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
      rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-static const RegStorage sp_regs_arr[] =
+static constexpr RegStorage sp_regs_arr[] =
     {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
      rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
      rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30,
      rs_fr31};
-static const RegStorage dp_regs_arr[] =
+static constexpr RegStorage dp_regs_arr[] =
     {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
      rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
-static const RegStorage reserved_regs_arr[] =
+static constexpr RegStorage reserved_regs_arr[] =
     {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
-static const RegStorage sp_temps_arr[] =
+static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
+static constexpr RegStorage sp_temps_arr[] =
     {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
      rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
-static const RegStorage dp_temps_arr[] =
+static constexpr RegStorage dp_temps_arr[] =
     {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7};
 
-static const std::vector<RegStorage> empty_pool;
-static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
-static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
-static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
-static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
-static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr,
-    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr,
-    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+static constexpr ArrayRef<const RegStorage> empty_pool;
+static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
+static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
+static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
+static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
+static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
+static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
+static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
 
 RegLocation ArmMir2Lir::LocCReturn() {
   return arm_loc_c_return;
 }
 
+RegLocation ArmMir2Lir::LocCReturnRef() {
+  return arm_loc_c_return;
+}
+
 RegLocation ArmMir2Lir::LocCReturnWide() {
   return arm_loc_c_return_wide;
 }
@@ -575,10 +571,10 @@ void ArmMir2Lir::CompilerInitializeRegAlloc() {
     // Redirect single precision's master storage to master.
     info->SetMaster(dp_reg_info);
     // Singles should show a single 32-bit mask bit, at first referring to the low half.
-    DCHECK_EQ(info->StorageMask(), 0x1U);
+    DCHECK_EQ(info->StorageMask(), RegisterInfo::kLowSingleStorageMask);
     if (sp_reg_num & 1) {
-      // For odd singles, change to user the high word of the backing double.
-      info->SetStorageMask(0x2);
+      // For odd singles, change to use the high word of the backing double.
+      info->SetStorageMask(RegisterInfo::kHighSingleStorageMask);
     }
   }
 
@@ -786,10 +782,13 @@ RegStorage ArmMir2Lir::AllocPreservedDouble(int s_reg) {
     }
   }
   if (res.Valid()) {
+    RegisterInfo* info = GetRegInfo(res);
     promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx].FpReg = res.DoubleToLowSingle().GetReg();
+    promotion_map_[p_map_idx].FpReg =
+        info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg().GetReg();
     promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx+1].FpReg = res.DoubleToHighSingle().GetReg();
+    promotion_map_[p_map_idx+1].FpReg =
+        info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg().GetReg();
   }
   return res;
 }
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 2e3ef86b9c..d0f8e741b6 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -287,9 +287,9 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
 
 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
-  Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg);
+  StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index fddbfd79ac..6251f4f578 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -59,6 +59,7 @@ class Arm64Mir2Lir : public Mir2Lir {
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
@@ -222,8 +223,6 @@ class Arm64Mir2Lir : public Mir2Lir {
                     bool skip_this);
 
   private:
-    void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
-                                  ConditionCode ccode);
     LIR* LoadFPConstantValue(int r_dest, int32_t value);
     LIR* LoadFPConstantValueWide(int r_dest, int64_t value);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 882ee6660d..acc7d17b56 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -47,7 +47,7 @@ void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -90,7 +90,7 @@ void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 8dad90aba6..0a76b9b295 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -29,7 +29,6 @@ LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage s
   return OpCondBranch(cond, target);
 }
 
-// TODO(Arm64): remove this.
 LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
   LOG(FATAL) << "Unexpected use of OpIT for Arm64";
   return NULL;
@@ -42,8 +41,8 @@ void Arm64Mir2Lir::OpEndIT(LIR* it) {
 /*
  * 64-bit 3way compare function.
  *     cmp   xA, xB
- *     csinc wC, wzr, wzr, eq
- *     csneg wC, wC, wC, le
+ *     csinc wC, wzr, wzr, eq  // wC = (xA == xB) ? 0 : 1
+ *     csneg wC, wC, wC, ge    // wC = (xA >= xB) ? wC : -wC
  */
 void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                               RegLocation rl_src2) {
@@ -53,10 +52,10 @@ void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
   rl_result = EvalLoc(rl_dest, kCoreReg, true);
 
   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  NewLIR4(WIDE(kA64Csinc4rrrc), rl_result.reg.GetReg(), rxzr, rxzr, kArmCondEq);
-  NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_result.reg.GetReg(),
-          rl_result.reg.GetReg(), kArmCondLe);
-  StoreValueWide(rl_dest, rl_result);
+  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
+  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+          rl_result.reg.GetReg(), kArmCondGe);
+  StoreValue(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
@@ -85,154 +84,60 @@ void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
   StoreValueWide(rl_dest, rl_result);
 }
 
-void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
-                                            int64_t val, ConditionCode ccode) {
-  LIR* taken = &block_label_list_[bb->taken];
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-
-  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-    ArmOpcode opcode = (ccode == kCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
-    LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0);
-    branch->target = taken;
-  } else {
-    OpRegImm64(kOpCmp, rl_src1.reg, val);
-    OpCondBranch(ccode, taken);
-  }
-}
-
 void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(FATAL);
-
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
-  ConditionCode ccode = mir->meta.ccode;
-  if (mir->ssa_rep->num_uses == 1) {
-    // CONST case
-    int true_val = mir->dalvikInsn.vB;
-    int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    // Change kCondNe to kCondEq for the special cases below.
-    if (ccode == kCondNe) {
-      ccode = kCondEq;
-      std::swap(true_val, false_val);
-    }
-    bool cheap_false_val = InexpensiveConstantInt(false_val);
-    if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
-      OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
-      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-      OpIT(true_val == 0 ? kCondNe : kCondUge, "");
-      LoadConstant(rl_result.reg, false_val);
-      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
-    } else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
-      OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
-      DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-      OpIT(kCondLs, "");
-      LoadConstant(rl_result.reg, false_val);
-      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
-    } else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
-      OpRegImm(kOpCmp, rl_src.reg, 0);
-      OpIT(ccode, "E");
-      LoadConstant(rl_result.reg, true_val);
-      LoadConstant(rl_result.reg, false_val);
-      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
-    } else {
-      // Unlikely case - could be tuned.
-      RegStorage t_reg1 = AllocTemp();
-      RegStorage t_reg2 = AllocTemp();
-      LoadConstant(t_reg1, true_val);
-      LoadConstant(t_reg2, false_val);
-      OpRegImm(kOpCmp, rl_src.reg, 0);
-      OpIT(ccode, "E");
-      OpRegCopy(rl_result.reg, t_reg1);
-      OpRegCopy(rl_result.reg, t_reg2);
-      GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
-    }
-  } else {
-    // MOVE case
-    RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
-    RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
-    OpRegImm(kOpCmp, rl_src.reg, 0);
-    if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
-      OpIT(NegateComparison(ccode), "");
-      OpRegCopy(rl_result.reg, rl_false.reg);
-    } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {  // False case in place?
-      OpIT(ccode, "");
-      OpRegCopy(rl_result.reg, rl_true.reg);
-    } else {  // Normal - select between the two.
-      OpIT(ccode, "E");
-      OpRegCopy(rl_result.reg, rl_true.reg);
-      OpRegCopy(rl_result.reg, rl_false.reg);
-    }
-    GenBarrier();  // Add a scheduling barrier to keep the IT shadow intact
-  }
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
+  ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode);
+
+  RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
+  RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
+  rl_true = LoadValue(rl_true, result_reg_class);
+  rl_false = LoadValue(rl_false, result_reg_class);
+  rl_result = EvalLoc(rl_dest, result_reg_class, true);
+  OpRegImm(kOpCmp, rl_src.reg, 0);
+  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(),
+          rl_false.reg.GetReg(), code);
   StoreValue(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(FATAL);
-
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
+  LIR* taken = &block_label_list_[bb->taken];
+  LIR* not_taken = &block_label_list_[bb->fall_through];
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   // Normalize such that if either operand is constant, src2 will be constant.
   ConditionCode ccode = mir->meta.ccode;
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
     ccode = FlipComparisonOrder(ccode);
   }
+
   if (rl_src2.is_const) {
-    RegLocation rl_temp = UpdateLocWide(rl_src2);
-    // Do special compare/branch against simple const operand if not already in registers.
+    rl_src2 = UpdateLocWide(rl_src2);
     int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-    if ((rl_temp.location != kLocPhysReg)
-     /*&& ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))*/) {
-      GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
+    // Special handling using cbz & cbnz.
+    if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
+      OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
+      OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
+      return;
+    // Only handle Imm if src2 is not already in a register.
+    } else if (rl_src2.location != kLocPhysReg) {
+      OpRegImm64(kOpCmp, rl_src1.reg, val);
+      OpCondBranch(ccode, taken);
+      OpCondBranch(NegateComparison(ccode), not_taken);
       return;
     }
   }
-  LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
-  switch (ccode) {
-    case kCondEq:
-      OpCondBranch(kCondNe, not_taken);
-      break;
-    case kCondNe:
-      OpCondBranch(kCondNe, taken);
-      break;
-    case kCondLt:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
+  OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
   OpCondBranch(ccode, taken);
+  OpCondBranch(NegateComparison(ccode), not_taken);
 }
 
 /*
@@ -468,7 +373,7 @@ bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  OpIT((is_min) ? kCondGt : kCondLt, "E");
+  // OpIT((is_min) ? kCondGt : kCondLt, "E");
   OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
   OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
   GenBarrier();
@@ -598,10 +503,10 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    rl_new_value = LoadValue(rl_src_new_value);
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -624,7 +529,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+    rl_expected = LoadValue(rl_src_expected);
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
@@ -668,7 +573,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
     NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0);
     OpRegReg(kOpSub, r_tmp, rl_expected.reg);
     DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-    OpIT(kCondEq, "T");
+    // OpIT(kCondEq, "T");
     NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
   }
 
@@ -684,7 +589,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
   DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
-  OpIT(kCondUlt, "");
+  // OpIT(kCondUlt, "");
   LoadConstant(rl_result.reg, 0); /* cc */
   FreeTemp(r_tmp);  // Now unneeded.
 
@@ -866,7 +771,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
   int data_offset;
   RegLocation rl_result;
   bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -901,7 +806,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
     } else {
       // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTemp();
+      reg_ptr = AllocTempRef();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
       FreeTemp(rl_index.reg);
     }
@@ -927,7 +832,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
     }
   } else {
     // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTemp();
+    RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -968,7 +873,7 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -982,7 +887,7 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
   }
 
   /* null object? */
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 02224476ff..b287399900 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -25,77 +25,72 @@
 
 namespace art {
 
-// TODO: rework this when c++11 support allows.
-static const RegStorage core_regs_arr[] =
+static constexpr RegStorage core_regs_arr[] =
     {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
      rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15,
      rs_w16, rs_w17, rs_w18, rs_w19, rs_w20, rs_w21, rs_w22, rs_w23,
      rs_w24, rs_w25, rs_w26, rs_w27, rs_w28, rs_w29, rs_w30, rs_w31,
      rs_wzr};
-static const RegStorage core64_regs_arr[] =
+static constexpr RegStorage core64_regs_arr[] =
     {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
      rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
      rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
      rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31,
      rs_xzr};
-static const RegStorage sp_regs_arr[] =
+static constexpr RegStorage sp_regs_arr[] =
     {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
      rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
      rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
      rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
-static const RegStorage dp_regs_arr[] =
+static constexpr RegStorage dp_regs_arr[] =
     {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
      rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
-static const RegStorage reserved_regs_arr[] =
+static constexpr RegStorage reserved_regs_arr[] =
     {rs_rA32_SUSPEND, rs_rA32_SELF, rs_rA32_SP, rs_rA32_LR, rs_wzr};
-static const RegStorage reserved64_regs_arr[] =
+static constexpr RegStorage reserved64_regs_arr[] =
     {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR, rs_xzr};
 // TUNING: Are there too many temp registers and too less promote target?
 // This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
 // Note: we are not able to call to C function directly if it un-match C ABI.
 // Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
-static const RegStorage core_temps_arr[] =
+static constexpr RegStorage core_temps_arr[] =
     {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
      rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
      rs_w17};
-static const RegStorage core64_temps_arr[] =
+static constexpr RegStorage core64_temps_arr[] =
     {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
      rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
      rs_x17};
-static const RegStorage sp_temps_arr[] =
+static constexpr RegStorage sp_temps_arr[] =
     {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
      rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
      rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
-static const RegStorage dp_temps_arr[] =
+static constexpr RegStorage dp_temps_arr[] =
     {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
 
-static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + arraysize(core_regs_arr));
-static const std::vector<RegStorage> core64_regs(core64_regs_arr,
-    core64_regs_arr + arraysize(core64_regs_arr));
-static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + arraysize(sp_regs_arr));
-static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + arraysize(dp_regs_arr));
-static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + arraysize(reserved_regs_arr));
-static const std::vector<RegStorage> reserved64_regs(reserved64_regs_arr,
-    reserved64_regs_arr + arraysize(reserved64_regs_arr));
-static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + arraysize(core_temps_arr));
-static const std::vector<RegStorage> core64_temps(core64_temps_arr,
-    core64_temps_arr + arraysize(core64_temps_arr));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr));
+static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
+static constexpr ArrayRef<const RegStorage> core64_regs(core64_regs_arr);
+static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
+static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
+static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
+static constexpr ArrayRef<const RegStorage> reserved64_regs(reserved64_regs_arr);
+static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
+static constexpr ArrayRef<const RegStorage> core64_temps(core64_temps_arr);
+static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
+static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
 
 RegLocation Arm64Mir2Lir::LocCReturn() {
   return arm_loc_c_return;
 }
 
+RegLocation Arm64Mir2Lir::LocCReturnRef() {
+  return arm_loc_c_return;
+}
+
 RegLocation Arm64Mir2Lir::LocCReturnWide() {
   return arm_loc_c_return_wide;
 }
@@ -258,7 +253,6 @@ static uint64_t RepeatBitsAcrossReg(bool is_wide, uint64_t value, unsigned width
   unsigned i;
   unsigned reg_size = (is_wide) ? 64 : 32;
   uint64_t result = value & BIT_MASK(width);
-  DCHECK_NE(width, reg_size);
   for (i = width; i < reg_size; i *= 2) {
     result |= (result << i);
   }
@@ -573,7 +567,7 @@ RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volat
   if (UNLIKELY(is_volatile)) {
     // On arm64, fp register load/store is atomic only for single bytes.
     if (size != kSignedByte && size != kUnsignedByte) {
-      return kCoreReg;
+      return (size == kReference) ? kRefReg : kCoreReg;
     }
   }
   return RegClassBySize(size);
@@ -836,7 +830,7 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
   int num_fpr_used = 0;
 
   /*
-   * Dummy up a RegLocation for the incoming Method*
+   * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod>
    * It will attempt to keep kArg0 live (or copy it to home location
    * if promoted).
    */
@@ -845,14 +839,10 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
   rl_src.reg = TargetReg(kArg0);
   rl_src.home = false;
   MarkLive(rl_src);
-
-  // rl_method might be 32-bit, but ArtMethod* on stack is 64-bit, so always flush it.
-  StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
-
-  // If Method* has been promoted, load it,
-  // otherwise, rl_method is the 32-bit value on [sp], and has already been loaded.
+  StoreValue(rl_method, rl_src);
+  // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
-    StoreValue(rl_method, rl_src);
+    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0));
   }
 
   if (cu_->num_ins == 0) {
@@ -909,9 +899,7 @@ int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
     RegLocation rl_arg = info->args[next_arg++];
     rl_arg = UpdateRawLoc(rl_arg);
     if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
-      RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1);
-      LoadValueDirectWideFixed(rl_arg, r_tmp);
-      next_reg++;
+      LoadValueDirectWideFixed(rl_arg, RegStorage::Solo64(next_reg));
       next_arg++;
     } else {
       if (rl_arg.wide) {
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index eca0d2fa82..d0ab4f6844 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -504,7 +504,7 @@ LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_s
   CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
   CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
   if (EncodingMap[opcode].flags & IS_QUAD_OP) {
-    DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+    DCHECK(!IsExtendEncoding(shift));
     return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
   } else {
     DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
@@ -706,40 +706,46 @@ bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
 LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
                                    int scale, OpSize size) {
   LIR* load;
+  int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
-  ArmOpcode wide = kA64NotWide;
-
-  DCHECK(scale == 0 || scale == 1);
 
   if (r_dest.IsFloat()) {
-    bool is_double = r_dest.IsDouble();
-    bool is_single = !is_double;
-    DCHECK_EQ(is_single, r_dest.IsSingle());
-
-    // If r_dest is a single, then size must be either k32 or kSingle.
-    // If r_dest is a double, then size must be either k64 or kDouble.
-    DCHECK(!is_single || size == k32 || size == kSingle);
-    DCHECK(!is_double || size == k64 || size == kDouble);
-    return NewLIR4((is_double) ? FWIDE(kA64Ldr4fXxG) : kA64Ldr4fXxG,
-                   r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+    if (r_dest.IsDouble()) {
+      DCHECK(size == k64 || size == kDouble);
+      expected_scale = 3;
+      opcode = FWIDE(kA64Ldr4fXxG);
+    } else {
+      DCHECK(r_dest.IsSingle());
+      DCHECK(size == k32 || size == kSingle);
+      expected_scale = 2;
+      opcode = kA64Ldr4fXxG;
+    }
+
+    DCHECK(scale == 0 || scale == expected_scale);
+    return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
+                   (scale != 0) ? 1 : 0);
   }
 
   switch (size) {
     case kDouble:
     case kWord:
     case k64:
-      wide = kA64Wide;
-      // Intentional fall-trough.
+      opcode = WIDE(kA64Ldr4rXxG);
+      expected_scale = 3;
+      break;
     case kSingle:
     case k32:
     case kReference:
       opcode = kA64Ldr4rXxG;
+      expected_scale = 2;
       break;
     case kUnsignedHalf:
       opcode = kA64Ldrh4wXxd;
+      expected_scale = 1;
       break;
     case kSignedHalf:
       opcode = kA64Ldrsh4rXxd;
+      expected_scale = 1;
       break;
     case kUnsignedByte:
       opcode = kA64Ldrb3wXx;
@@ -751,13 +757,14 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto
       LOG(FATAL) << "Bad size: " << size;
   }
 
-  if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
-    // Tertiary ops (e.g. ldrb, ldrsb) do not support scale.
+  if (UNLIKELY(expected_scale == 0)) {
+    // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale.
+    DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
     DCHECK_EQ(scale, 0);
-    load = NewLIR3(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
+    load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
   } else {
-    DCHECK(scale == 0 || scale == ((wide == kA64Wide) ? 3 : 2));
-    load = NewLIR4(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
+    DCHECK(scale == 0 || scale == expected_scale);
+    load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
                    (scale != 0) ? 1 : 0);
   }
 
@@ -767,39 +774,43 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto
 LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
                                     int scale, OpSize size) {
   LIR* store;
+  int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
-  ArmOpcode wide = kA64NotWide;
-
-  DCHECK(scale == 0 || scale == 1);
 
   if (r_src.IsFloat()) {
-    bool is_double = r_src.IsDouble();
-    bool is_single = !is_double;
-    DCHECK_EQ(is_single, r_src.IsSingle());
-
-    // If r_src is a single, then size must be either k32 or kSingle.
-    // If r_src is a double, then size must be either k64 or kDouble.
-    DCHECK(!is_single || size == k32 || size == kSingle);
-    DCHECK(!is_double || size == k64 || size == kDouble);
-    return NewLIR4((is_double) ? FWIDE(kA64Str4fXxG) : kA64Str4fXxG,
-                   r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+    if (r_src.IsDouble()) {
+      DCHECK(size == k64 || size == kDouble);
+      expected_scale = 3;
+      opcode = FWIDE(kA64Str4fXxG);
+    } else {
+      DCHECK(r_src.IsSingle());
+      DCHECK(size == k32 || size == kSingle);
+      expected_scale = 2;
+      opcode = kA64Str4fXxG;
+    }
+
+    DCHECK(scale == 0 || scale == expected_scale);
+    return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
+                   (scale != 0) ? 1 : 0);
   }
 
   switch (size) {
     case kDouble:     // Intentional fall-trough.
     case kWord:       // Intentional fall-trough.
     case k64:
-      opcode = kA64Str4rXxG;
-      wide = kA64Wide;
+      opcode = WIDE(kA64Str4rXxG);
+      expected_scale = 3;
       break;
     case kSingle:     // Intentional fall-trough.
     case k32:         // Intentional fall-trough.
     case kReference:
       opcode = kA64Str4rXxG;
+      expected_scale = 2;
       break;
     case kUnsignedHalf:
     case kSignedHalf:
       opcode = kA64Strh4wXxd;
+      expected_scale = 1;
       break;
     case kUnsignedByte:
     case kSignedByte:
@@ -809,12 +820,14 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt
       LOG(FATAL) << "Bad size: " << size;
   }
 
-  if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
-    // Tertiary ops (e.g. strb) do not support scale.
+  if (UNLIKELY(expected_scale == 0)) {
+    // This is a tertiary op (e.g. strb), it does not not support scale.
+    DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
     DCHECK_EQ(scale, 0);
-    store = NewLIR3(opcode | wide, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
+    store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
   } else {
-    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+    store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
+                    (scale != 0) ? 1 : 0);
   }
 
   return store;
@@ -842,8 +855,8 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor
         opcode = FWIDE(kA64Ldr3fXD);
         alt_opcode = FWIDE(kA64Ldur3fXd);
       } else {
-        opcode = FWIDE(kA64Ldr3rXD);
-        alt_opcode = FWIDE(kA64Ldur3rXd);
+        opcode = WIDE(kA64Ldr3rXD);
+        alt_opcode = WIDE(kA64Ldur3rXd);
       }
       break;
     case kSingle:     // Intentional fall-through.
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 256135df71..3fbbc4eba7 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1201,21 +1201,27 @@ std::vector<uint8_t>* Mir2Lir::ReturnCallFrameInformation() {
 }
 
 RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) {
-  loc.wide = false;
   if (loc.location == kLocPhysReg) {
+    DCHECK(!loc.reg.Is32Bit());
     if (loc.reg.IsPair()) {
-      loc.reg = loc.reg.GetLow();
+      RegisterInfo* info_lo = GetRegInfo(loc.reg.GetLow());
+      RegisterInfo* info_hi = GetRegInfo(loc.reg.GetHigh());
+      info_lo->SetIsWide(false);
+      info_hi->SetIsWide(false);
+      loc.reg = info_lo->GetReg();
     } else {
-      // FIXME: temp workaround.
-      // Issue here: how do we narrow to a 32-bit value in 64-bit container?
-      // Probably the wrong thing to narrow the RegStorage container here.  That
-      // should be a target decision.  At the RegLocation level, we're only
-      // modifying the view of the Dalvik value - this is orthogonal to the storage
-      // container size.  Consider this a temp workaround.
-      DCHECK(loc.reg.IsDouble());
-      loc.reg = loc.reg.DoubleToLowSingle();
+      RegisterInfo* info = GetRegInfo(loc.reg);
+      RegisterInfo* info_new = info->FindMatchingView(RegisterInfo::k32SoloStorageMask);
+      DCHECK(info_new != nullptr);
+      if (info->IsLive() && (info->SReg() == loc.s_reg_low)) {
+        info->MarkDead();
+        info_new->MarkLive(loc.s_reg_low);
+      }
+      loc.reg = info_new->GetReg();
     }
+    DCHECK(loc.reg.Valid());
   }
+  loc.wide = false;
   return loc;
 }
 
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 526c981ae9..6397208790 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -43,6 +43,7 @@ MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) {
 
 uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) {
   DCHECK_LT(arg, invoke->dalvikInsn.vA);
+  DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode));
   if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) {
     return invoke->dalvikInsn.vC + arg;  // Non-range invoke.
   } else {
@@ -53,6 +54,7 @@ uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) {
 
 bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) {
   DCHECK_LT(arg + 1, invoke->dalvikInsn.vA);
+  DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode));
   return Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc ||
       invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u;
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 7e3c8ce7e7..62c81d05bb 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -220,6 +220,8 @@ void Mir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags) {
 void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                                   RegLocation rl_src2, LIR* taken,
                                   LIR* fall_through) {
+  DCHECK(!rl_src1.fp);
+  DCHECK(!rl_src2.fp);
   ConditionCode cond;
   switch (opcode) {
     case Instruction::IF_EQ:
@@ -253,7 +255,7 @@ void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
     cond = FlipComparisonOrder(cond);
   }
 
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
+  rl_src1 = LoadValue(rl_src1);
   // Is this really an immediate comparison?
   if (rl_src2.is_const) {
     // If it's already live in a register or not easily materialized, just keep going
@@ -265,14 +267,15 @@ void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
       return;
     }
   }
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
+  rl_src2 = LoadValue(rl_src2);
   OpCmpBranch(cond, rl_src1.reg, rl_src2.reg, taken);
 }
 
 void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken,
                                       LIR* fall_through) {
   ConditionCode cond;
-  rl_src = LoadValue(rl_src, kCoreReg);
+  DCHECK(!rl_src.fp);
+  rl_src = LoadValue(rl_src);
   switch (opcode) {
     case Instruction::IF_EQZ:
       cond = kCondEq;
@@ -371,7 +374,7 @@ static void GenNewArrayImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu,
     func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayWithAccessCheck);
     mir_to_lir->CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
   }
-  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  RegLocation rl_result = mir_to_lir->GetReturn(kRefReg);
   mir_to_lir->StoreValue(rl_dest, rl_result);
 }
 
@@ -503,7 +506,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
     }
   }
   if (info->result.location != kLocInvalid) {
-    StoreValue(info->result, GetReturn(false /* not fp */));
+    StoreValue(info->result, GetReturn(kRefReg));
   }
 }
 
@@ -562,8 +565,8 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double,
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
-      RegLocation rl_method  = LoadCurrMethod();
-      r_base = AllocTemp();
+      RegLocation rl_method = LoadCurrMethod();
+      r_base = AllocTempRef();
       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
       if (IsTemp(rl_method.reg)) {
         FreeTemp(rl_method.reg);
@@ -603,6 +606,8 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double,
                                                      field_info.StorageIndex(), r_base));
 
         FreeTemp(r_tmp);
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadLoad);
       }
       FreeTemp(r_method);
     }
@@ -658,7 +663,7 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest,
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method  = LoadCurrMethod();
-      r_base = AllocTemp();
+      r_base = AllocTempRef();
       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
@@ -694,6 +699,8 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest,
                                                      field_info.StorageIndex(), r_base));
 
         FreeTemp(r_tmp);
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadLoad);
       }
       FreeTemp(r_method);
     }
@@ -726,10 +733,10 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest,
       GenSgetCall<4>(this, is_long_or_double, is_object, &field_info);
     }
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(rl_dest.fp);
+      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(rl_dest.fp);
+      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -766,7 +773,7 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
       (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) {
     RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kCoreReg);
+    rl_obj = LoadValue(rl_obj, kRefReg);
     GenNullCheck(rl_obj.reg, opt_flags);
     RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
     int field_offset = field_info.FieldOffset().Int32Value();
@@ -793,10 +800,10 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size,
       GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj);
     }
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(rl_dest.fp);
+      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(rl_dest.fp);
+      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -824,7 +831,7 @@ void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size,
       (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) {
     RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kCoreReg);
+    rl_obj = LoadValue(rl_obj, kRefReg);
     if (is_long_or_double) {
       rl_src = LoadValueWide(rl_src, reg_class);
     } else {
@@ -881,7 +888,7 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
   RegStorage res_reg = AllocTemp();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
                                                    *cu_->dex_file,
                                                    type_idx)) {
@@ -894,15 +901,15 @@ void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
       CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                               type_idx, rl_method.reg, true);
     }
-    RegLocation rl_result = GetReturn(false);
+    RegLocation rl_result = GetReturn(kRefReg);
     StoreValue(rl_dest, rl_result);
   } else {
     // We're don't need access checks, load type from dex cache
     int32_t dex_cache_offset =
         mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
-    Load32Disp(rl_method.reg, dex_cache_offset, res_reg);
+    LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
-    Load32Disp(res_reg, offset_of_type, rl_result.reg);
+    LoadRefDisp(res_reg, offset_of_type, rl_result.reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
@@ -976,7 +983,7 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
                 TargetReg(kArg0));
 
     // Might call out to helper, which will return resolved string in kRet0
-    Load32Disp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
+    LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
     LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
@@ -1010,13 +1017,13 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
     }
 
     GenBarrier();
-    StoreValue(rl_dest, GetReturn(false));
+    StoreValue(rl_dest, GetReturn(kRefReg));
   } else {
     RegLocation rl_method = LoadCurrMethod();
-    RegStorage res_reg = AllocTemp();
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    RegStorage res_reg = AllocTempRef();
+    RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
     LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg);
-    Load32Disp(res_reg, offset_of_string, rl_result.reg);
+    LoadRefDisp(res_reg, offset_of_string, rl_result.reg);
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -1071,7 +1078,7 @@ static void GenNewInstanceImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, uint32_
     func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectWithAccessCheck);
     mir_to_lir->CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   }
-  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  RegLocation rl_result = mir_to_lir->GetReturn(kRefReg);
   mir_to_lir->StoreValue(rl_dest, rl_result);
 }
 
@@ -1103,7 +1110,7 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re
   // X86 has its own implementation.
   DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64);
 
-  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
   if (result_reg == object.reg) {
@@ -1112,8 +1119,8 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re
   LoadConstant(result_reg, 0);     // assume false
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kCoreReg);
-  RegStorage object_class = AllocTypedTemp(false, kCoreReg);
+  RegStorage check_class = AllocTypedTemp(false, kRefReg);
+  RegStorage object_class = AllocTypedTemp(false, kRefReg);
 
   LoadCurrMethodDirect(check_class);
   if (use_declaring_class) {
@@ -1206,7 +1213,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kRefReg);
   if (cu_->instruction_set == kMips) {
     // On MIPS rArg0 != rl_result, place false in result if branch is taken.
     LoadConstant(rl_result.reg, 0);
@@ -1511,7 +1518,7 @@ void Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
   } else {
     GenShiftOpLongCall<4>(this, opcode, rl_src1, rl_shift);
   }
-  RegLocation rl_result = GetReturnWide(false);
+  RegLocation rl_result = GetReturnWide(kCoreReg);
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -1653,7 +1660,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
         CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), false /* not a safepoint */);
       }
       if (op == kOpDiv)
-        rl_result = GetReturn(false);
+        rl_result = GetReturn(kCoreReg);
       else
         rl_result = GetReturnAlt();
     }
@@ -1918,7 +1925,7 @@ void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, Re
                                   false);
         }
         if (is_div)
-          rl_result = GetReturn(false);
+          rl_result = GetReturn(kCoreReg);
         else
           rl_result = GetReturnAlt();
       }
@@ -2081,7 +2088,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc
     }
     // Adjust return regs in to handle case of rem returning kArg2/kArg3
     if (ret_reg == mir_to_lir->TargetReg(kRet0).GetReg())
-      rl_result = mir_to_lir->GetReturnWide(false);
+      rl_result = mir_to_lir->GetReturnWide(kCoreReg);
     else
       rl_result = mir_to_lir->GetReturnWideAlt();
     mir_to_lir->StoreValueWide(rl_dest, rl_result);
@@ -2119,11 +2126,11 @@ void Mir2Lir::GenConversionCall(ThreadOffset<pointer_size> func_offset,
   CallRuntimeHelperRegLocation(func_offset, rl_src, false);
   if (rl_dest.wide) {
     RegLocation rl_result;
-    rl_result = GetReturnWide(rl_dest.fp);
+    rl_result = GetReturnWide(LocToRegClass(rl_dest));
     StoreValueWide(rl_dest, rl_result);
   } else {
     RegLocation rl_result;
-    rl_result = GetReturn(rl_dest.fp);
+    rl_result = GetReturn(LocToRegClass(rl_dest));
     StoreValue(rl_dest, rl_result);
   }
 }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 5ec1ca9d63..842533b66b 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -434,7 +434,7 @@ INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation, Re
  */
 void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
   /*
-   * Dummy up a RegLocation for the incoming Method*
+   * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod>
    * It will attempt to keep kArg0 live (or copy it to home location
    * if promoted).
    */
@@ -443,14 +443,10 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
   rl_src.reg = TargetReg(kArg0);
   rl_src.home = false;
   MarkLive(rl_src);
-  if (rl_method.wide) {
-    StoreValueWide(rl_method, rl_src);
-  } else {
-    StoreValue(rl_method, rl_src);
-  }
+  StoreValue(rl_method, rl_src);
   // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
-    StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0));
   }
 
   if (cu_->num_ins == 0) {
@@ -864,8 +860,17 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
       // Wide spans, we need the 2nd half of uses[2].
       rl_arg = UpdateLocWide(rl_use2);
       if (rl_arg.location == kLocPhysReg) {
-        // NOTE: not correct for 64-bit core regs, but this needs rewriting for hard-float.
-        reg = rl_arg.reg.IsPair() ? rl_arg.reg.GetHigh() : rl_arg.reg.DoubleToHighSingle();
+        if (rl_arg.reg.IsPair()) {
+          reg = rl_arg.reg.GetHigh();
+        } else {
+          RegisterInfo* info = GetRegInfo(rl_arg.reg);
+          info = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask);
+          if (info == nullptr) {
+            // NOTE: For hard float convention we won't split arguments across reg/mem.
+            UNIMPLEMENTED(FATAL) << "Needs hard float api.";
+          }
+          reg = info->GetReg();
+        }
       } else {
         // kArg2 & rArg3 can safely be used here
         reg = TargetReg(kArg3);
@@ -1151,7 +1156,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
-    res = GetReturn(false);
+    res = GetReturn(LocToRegClass(info->result));
   } else {
     res = info->result;
   }
@@ -1161,7 +1166,7 @@ RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
 RegLocation Mir2Lir::InlineTargetWide(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
-    res = GetReturnWide(false);
+    res = GetReturnWide(kCoreReg);
   } else {
     res = info->result;
   }
@@ -1184,7 +1189,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) {
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
-  rl_obj = LoadValue(rl_obj, kCoreReg);
+  rl_obj = LoadValue(rl_obj, kRefReg);
   // X86 wants to avoid putting a constant index into a register.
   if (!((cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64)&& rl_idx.is_const)) {
     rl_idx = LoadValue(rl_idx, kCoreReg);
@@ -1197,7 +1202,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) {
   RegStorage reg_ptr;
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     reg_off = AllocTemp();
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
     if (range_check) {
       reg_max = AllocTemp();
       Load32Disp(rl_obj.reg, count_offset, reg_max);
@@ -1227,9 +1232,9 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) {
       }
     }
     reg_off = AllocTemp();
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
     Load32Disp(rl_obj.reg, offset_offset, reg_off);
-    Load32Disp(rl_obj.reg, value_offset, reg_ptr);
+    LoadRefDisp(rl_obj.reg, value_offset, reg_ptr);
   }
   if (rl_idx.is_const) {
     OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
@@ -1266,7 +1271,7 @@ bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) {
   }
   // dst = src.length();
   RegLocation rl_obj = info->args[0];
-  rl_obj = LoadValue(rl_obj, kCoreReg);
+  rl_obj = LoadValue(rl_obj, kRefReg);
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   GenNullCheck(rl_obj.reg, info->opt_flags);
@@ -1472,7 +1477,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
     DCHECK_EQ(mir_graph_->ConstantValue(rl_char) & ~0xFFFF, 0);
     DCHECK(high_code_point_branch == nullptr);
   }
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
   StoreValue(rl_dest, rl_return);
   return true;
@@ -1518,7 +1523,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
     }
   }
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
   StoreValue(rl_dest, rl_return);
   return true;
@@ -1570,7 +1575,7 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info,
   rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
   RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info);  // result reg
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_long) {
@@ -1616,7 +1621,7 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long,
     // There might have been a store before this volatile one so insert StoreStore barrier.
     GenMemBarrier(kStoreStore);
   }
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_value;
   if (is_long) {
@@ -1630,7 +1635,7 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long,
       FreeTemp(rl_temp_offset);
     }
   } else {
-    rl_value = LoadValue(rl_src_value, kCoreReg);
+    rl_value = LoadValue(rl_src_value);
     StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
   }
 
@@ -1653,7 +1658,7 @@ void Mir2Lir::GenInvoke(CallInfo* info) {
     if (info->type != kStatic &&
         ((cu_->disable_opt & (1 << kNullCheckElimination)) != 0 ||
          (info->opt_flags & MIR_IGNORE_NULL_CHECK) == 0))  {
-      RegLocation rl_obj = LoadValue(info->args[0], kCoreReg);
+      RegLocation rl_obj = LoadValue(info->args[0], kRefReg);
       GenNullCheck(rl_obj.reg);
     }
     return;
@@ -1778,10 +1783,10 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
   if (info->result.location != kLocInvalid) {
     // We have a following MOVE_RESULT - do it now.
     if (info->result.wide) {
-      RegLocation ret_loc = GetReturnWide(info->result.fp);
+      RegLocation ret_loc = GetReturnWide(LocToRegClass(info->result));
       StoreValueWide(info->result, ret_loc);
     } else {
-      RegLocation ret_loc = GetReturn(info->result.fp);
+      RegLocation ret_loc = GetReturn(LocToRegClass(info->result));
       StoreValue(info->result, ret_loc);
     }
   }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index f5e7e635de..2c8b9b9adf 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -139,6 +139,7 @@ void Mir2Lir::LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest) {
 }
 
 RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
+  DCHECK(!rl_src.ref || op_kind == kRefReg);
   rl_src = UpdateLoc(rl_src);
   if (rl_src.location == kLocPhysReg) {
     if (!RegClassMatches(op_kind, rl_src.reg)) {
@@ -162,6 +163,10 @@ RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
   return rl_src;
 }
 
+RegLocation Mir2Lir::LoadValue(RegLocation rl_src) {
+  return LoadValue(rl_src, LocToRegClass(rl_src));
+}
+
 void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) {
   /*
    * Sanity checking - should never try to store to the same
@@ -366,7 +371,7 @@ void Mir2Lir::LoadCurrMethodDirect(RegStorage r_tgt) {
 }
 
 RegLocation Mir2Lir::LoadCurrMethod() {
-  return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg);
+  return LoadValue(mir_graph_->GetMethodLoc(), kRefReg);
 }
 
 RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 3af3715f47..e1bdb2e9b6 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -261,11 +261,11 @@ void MipsMir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) {
 
 void MipsMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  RegStorage reset_reg = AllocTempRef();
+  LoadRefDisp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rMIPS_SELF, ex_offset, reset_reg);
+  StoreRefDisp(rs_rMIPS_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e46217337b..ea3c901fa6 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -59,6 +59,7 @@ class MipsMir2Lir FINAL : public Mir2Lir {
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 9fffb2fd1d..4e31477189 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -52,7 +52,7 @@ void MipsMir2Lir::GenArithOpFloat(Instruction::Code opcode,
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -95,7 +95,7 @@ void MipsMir2Lir::GenArithOpDouble(Instruction::Code opcode,
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
@@ -204,7 +204,7 @@ void MipsMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
   RegStorage r_tgt = LoadHelper(offset);
   // NOTE: not a safepoint
   OpReg(kOpBlx, r_tgt);
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kCoreReg);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 55cf4344f1..c1a7c990f0 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -26,46 +26,43 @@
 
 namespace art {
 
-static const RegStorage core_regs_arr[] =
+static constexpr RegStorage core_regs_arr[] =
     {rs_rZERO, rs_rAT, rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0, rs_rT1, rs_rT2,
      rs_rT3, rs_rT4, rs_rT5, rs_rT6, rs_rT7, rs_rS0, rs_rS1, rs_rS2, rs_rS3, rs_rS4, rs_rS5,
      rs_rS6, rs_rS7, rs_rT8, rs_rT9, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rFP, rs_rRA};
-static RegStorage sp_regs_arr[] =
+static constexpr RegStorage sp_regs_arr[] =
     {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
      rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
-static RegStorage dp_regs_arr[] =
+static constexpr RegStorage dp_regs_arr[] =
     {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
-static const RegStorage reserved_regs_arr[] =
+static constexpr RegStorage reserved_regs_arr[] =
     {rs_rZERO, rs_rAT, rs_rS0, rs_rS1, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rRA};
-static RegStorage core_temps_arr[] =
+static constexpr RegStorage core_temps_arr[] =
     {rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0, rs_rT1, rs_rT2, rs_rT3, rs_rT4,
      rs_rT5, rs_rT6, rs_rT7, rs_rT8};
-static RegStorage sp_temps_arr[] =
+static constexpr RegStorage sp_temps_arr[] =
     {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
      rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
-static RegStorage dp_temps_arr[] =
+static constexpr RegStorage dp_temps_arr[] =
     {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
 
-static const std::vector<RegStorage> empty_pool;
-static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
-static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
-static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
-static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
-static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr,
-    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr,
-    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+static constexpr ArrayRef<const RegStorage> empty_pool;
+static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
+static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
+static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
+static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
+static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
+static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
+static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
 
 RegLocation MipsMir2Lir::LocCReturn() {
   return mips_loc_c_return;
 }
 
+RegLocation MipsMir2Lir::LocCReturnRef() {
+  return mips_loc_c_return;
+}
+
 RegLocation MipsMir2Lir::LocCReturnWide() {
   return mips_loc_c_return_wide;
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 9fc93d0a1a..9621995b43 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -23,6 +23,36 @@
 
 namespace art {
 
+RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) {
+  RegisterClass res;
+  switch (shorty_type) {
+    case 'L':
+      res = kRefReg;
+      break;
+    case 'F':
+      // Expected fallthrough.
+    case 'D':
+      res = kFPReg;
+      break;
+    default:
+      res = kCoreReg;
+  }
+  return res;
+}
+
+RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) {
+  RegisterClass res;
+  if (loc.fp) {
+    DCHECK(!loc.ref) << "At most, one of ref/fp may be set";
+    res = kFPReg;
+  } else if (loc.ref) {
+    res = kRefReg;
+  } else {
+    res = kCoreReg;
+  }
+  return res;
+}
+
 void Mir2Lir::LockArg(int in_position, bool wide) {
   RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
   RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
@@ -149,15 +179,13 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
     return false;
   }
 
-  // The inliner doesn't distinguish kDouble or kFloat, use shorty.
-  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
-
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.object_arg);
-  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
-  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
+  RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
+  RegLocation rl_dest = wide ? GetReturnWide(ret_reg_class) : GetReturn(ret_reg_class);
   RegStorage r_result = rl_dest.reg;
   if (!RegClassMatches(reg_class, r_result)) {
     r_result = wide ? AllocTypedTempWide(rl_dest.fp, reg_class)
@@ -205,7 +233,7 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
   GenPrintLabel(mir);
   LockArg(data.object_arg);
   LockArg(data.src_arg, wide);
-  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
+  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegStorage reg_src = LoadArg(data.src_arg, reg_class, wide);
   if (data.is_volatile) {
@@ -226,13 +254,12 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
 bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) {
   const InlineReturnArgData& data = special.d.return_data;
   bool wide = (data.is_wide != 0u);
-  // The inliner doesn't distinguish kDouble or kFloat, use shorty.
-  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.arg, wide);
-  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]);
+  RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class);
   LoadArgDirect(data.arg, rl_dest);
   return true;
 }
@@ -254,7 +281,7 @@ bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& speci
       break;
     case kInlineOpNonWideConst: {
       successful = true;
-      RegLocation rl_dest = GetReturn(cu_->shorty[0] == 'F');
+      RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0]));
       GenPrintLabel(mir);
       LoadConstant(rl_dest.reg, static_cast<int>(special.d.data));
       return_mir = bb->GetNextUnconditionalMir(mir_graph_, mir);
@@ -377,26 +404,30 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
       }
       break;
 
-    case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
+      DCHECK(rl_src[0].ref);
+      // Intentional fallthrough.
+    case Instruction::RETURN:
       if (!mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
-      StoreValue(GetReturn(cu_->shorty[0] == 'F'), rl_src[0]);
+      DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
+      StoreValue(GetReturn(LocToRegClass(rl_src[0])), rl_src[0]);
       break;
 
     case Instruction::RETURN_WIDE:
       if (!mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
-      StoreValueWide(GetReturnWide(cu_->shorty[0] == 'D'), rl_src[0]);
+      DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
+      StoreValueWide(GetReturnWide(LocToRegClass(rl_src[0])), rl_src[0]);
       break;
 
     case Instruction::MOVE_RESULT_WIDE:
       if ((opt_flags & MIR_INLINED) != 0) {
         break;  // Nop - combined w/ previous invoke.
       }
-      StoreValueWide(rl_dest, GetReturnWide(rl_dest.fp));
+      StoreValueWide(rl_dest, GetReturnWide(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE_RESULT:
@@ -404,7 +435,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
       if ((opt_flags & MIR_INLINED) != 0) {
         break;  // Nop - combined w/ previous invoke.
       }
-      StoreValue(rl_dest, GetReturn(rl_dest.fp));
+      StoreValue(rl_dest, GetReturn(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE:
@@ -474,7 +505,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
     case Instruction::ARRAY_LENGTH:
       int len_offset;
       len_offset = mirror::Array::LengthOffset().Int32Value();
-      rl_src[0] = LoadValue(rl_src[0], kCoreReg);
+      rl_src[0] = LoadValue(rl_src[0], kRefReg);
       GenNullCheck(rl_src[0].reg, opt_flags);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       Load32Disp(rl_src[0].reg, len_offset, rl_result.reg);
@@ -782,7 +813,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list
 
     case Instruction::LONG_TO_INT:
       rl_src[0] = UpdateLocWide(rl_src[0]);
-      rl_src[0] = WideToNarrow(rl_src[0]);
+      rl_src[0] = NarrowRegLoc(rl_src[0]);
       StoreValue(rl_dest, rl_src[0]);
       break;
 
@@ -1069,7 +1100,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
       work_half->meta.throw_insn = mir;
     }
 
-    if (opcode >= kMirOpFirst) {
+    if (MIRGraph::IsPseudoMirOp(opcode)) {
       HandleExtendedMethodMIR(bb, mir);
       continue;
     }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index f58f078711..ed94a8d844 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -26,6 +26,7 @@
 #include "driver/compiler_driver.h"
 #include "leb128.h"
 #include "safe_map.h"
+#include "utils/array_ref.h"
 #include "utils/arena_allocator.h"
 #include "utils/growable_array.h"
 
@@ -332,6 +333,15 @@ class Mir2Lir : public Backend {
         return arena->Alloc(size, kArenaAllocRegAlloc);
       }
 
+      static const uint32_t k32SoloStorageMask     = 0x00000001;
+      static const uint32_t kLowSingleStorageMask  = 0x00000001;
+      static const uint32_t kHighSingleStorageMask = 0x00000002;
+      static const uint32_t k64SoloStorageMask     = 0x00000003;
+      static const uint32_t k128SoloStorageMask    = 0x0000000f;
+      static const uint32_t k256SoloStorageMask    = 0x000000ff;
+      static const uint32_t k512SoloStorageMask    = 0x0000ffff;
+      static const uint32_t k1024SoloStorageMask   = 0xffffffff;
+
       bool InUse() { return (storage_mask_ & master_->used_storage_) != 0; }
       void MarkInUse() { master_->used_storage_ |= storage_mask_; }
       void MarkFree() { master_->used_storage_ &= ~storage_mask_; }
@@ -389,7 +399,15 @@ class Mir2Lir : public Backend {
       LIR* DefEnd() { return def_end_; }
       void SetDefEnd(LIR* def_end) { def_end_ = def_end; }
       void ResetDefBody() { def_start_ = def_end_ = nullptr; }
-
+      // Find member of aliased set matching storage_used; return nullptr if none.
+      RegisterInfo* FindMatchingView(uint32_t storage_used) {
+        RegisterInfo* res = Master();
+        for (; res != nullptr; res = res->GetAliasChain()) {
+          if (res->StorageMask() == storage_used)
+            break;
+        }
+        return res;
+      }
 
      private:
       RegStorage reg_;
@@ -412,16 +430,16 @@ class Mir2Lir : public Backend {
     class RegisterPool {
      public:
       RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
-                   const std::vector<RegStorage>& core_regs,
-                   const std::vector<RegStorage>& core64_regs,
-                   const std::vector<RegStorage>& sp_regs,
-                   const std::vector<RegStorage>& dp_regs,
-                   const std::vector<RegStorage>& reserved_regs,
-                   const std::vector<RegStorage>& reserved64_regs,
-                   const std::vector<RegStorage>& core_temps,
-                   const std::vector<RegStorage>& core64_temps,
-                   const std::vector<RegStorage>& sp_temps,
-                   const std::vector<RegStorage>& dp_temps);
+                   const ArrayRef<const RegStorage>& core_regs,
+                   const ArrayRef<const RegStorage>& core64_regs,
+                   const ArrayRef<const RegStorage>& sp_regs,
+                   const ArrayRef<const RegStorage>& dp_regs,
+                   const ArrayRef<const RegStorage>& reserved_regs,
+                   const ArrayRef<const RegStorage>& reserved64_regs,
+                   const ArrayRef<const RegStorage>& core_temps,
+                   const ArrayRef<const RegStorage>& core64_temps,
+                   const ArrayRef<const RegStorage>& sp_temps,
+                   const ArrayRef<const RegStorage>& dp_temps);
       ~RegisterPool() {}
       static void* operator new(size_t size, ArenaAllocator* arena) {
         return arena->Alloc(size, kArenaAllocRegAlloc);
@@ -439,6 +457,8 @@ class Mir2Lir : public Backend {
       int next_sp_reg_;
       GrowableArray<RegisterInfo*> dp_regs_;    // Double precision float.
       int next_dp_reg_;
+      GrowableArray<RegisterInfo*>* ref_regs_;  // Points to core_regs_ or core64_regs_
+      int* next_ref_reg_;
 
      private:
       Mir2Lir* const m2l_;
@@ -533,8 +553,12 @@ class Mir2Lir : public Backend {
      * just use our knowledge of type to select the most appropriate register class?
      */
     RegisterClass RegClassBySize(OpSize size) {
-      return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
-              size == kSignedByte) ? kCoreReg : kAnyReg;
+      if (size == kReference) {
+        return kRefReg;
+      } else {
+        return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
+                size == kSignedByte) ? kCoreReg : kAnyReg;
+      }
     }
 
     size_t CodeBufferSizeInBytes() {
@@ -595,6 +619,8 @@ class Mir2Lir : public Backend {
       return current_dalvik_offset_;
     }
 
+    RegisterClass ShortyToRegClass(char shorty_type);
+    RegisterClass LocToRegClass(RegLocation loc);
     int ComputeFrameSize();
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
@@ -648,7 +674,7 @@ class Mir2Lir : public Backend {
     virtual void EndInvoke(CallInfo* info) {}
 
 
-    // Handle bookkeeping to convert a wide RegLocation to a narow RegLocation.  No code generated.
+    // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation.  No code generated.
     RegLocation NarrowRegLoc(RegLocation loc);
 
     // Shared by all targets - implemented in local_optimizations.cc
@@ -682,6 +708,7 @@ class Mir2Lir : public Backend {
     virtual RegStorage AllocFreeTemp();
     virtual RegStorage AllocTemp();
     virtual RegStorage AllocTempWide();
+    virtual RegStorage AllocTempRef();
     virtual RegStorage AllocTempSingle();
     virtual RegStorage AllocTempDouble();
     virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
@@ -701,7 +728,6 @@ class Mir2Lir : public Backend {
     void NullifyRange(RegStorage reg, int s_reg);
     void MarkDef(RegLocation rl, LIR *start, LIR *finish);
     void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
-    virtual RegLocation WideToNarrow(RegLocation rl);
     void ResetDefLoc(RegLocation rl);
     void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
@@ -746,8 +772,8 @@ class Mir2Lir : public Backend {
     void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
-    RegLocation GetReturnWide(bool is_double);
-    RegLocation GetReturn(bool is_float);
+    RegLocation GetReturnWide(RegisterClass reg_class);
+    RegLocation GetReturn(RegisterClass reg_class);
     RegisterInfo* GetRegInfo(RegStorage reg);
 
     // Shared by all targets - implemented in gen_common.cc.
@@ -955,6 +981,8 @@ class Mir2Lir : public Backend {
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
+    // Same as above, but derive the target register class from the location record.
+    virtual RegLocation LoadValue(RegLocation rl_src);
     // Load Dalvik value with 64-bit memory storage.
     virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
@@ -1104,6 +1132,7 @@ class Mir2Lir : public Backend {
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
+    virtual RegLocation LocCReturnRef() = 0;
     virtual RegLocation LocCReturnDouble() = 0;
     virtual RegLocation LocCReturnFloat() = 0;
     virtual RegLocation LocCReturnWide() = 0;
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index e5ca460e88..bbeef50d73 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -56,16 +56,16 @@ Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, uint64_t mask)
 }
 
 Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
-                                    const std::vector<RegStorage>& core_regs,
-                                    const std::vector<RegStorage>& core64_regs,
-                                    const std::vector<RegStorage>& sp_regs,
-                                    const std::vector<RegStorage>& dp_regs,
-                                    const std::vector<RegStorage>& reserved_regs,
-                                    const std::vector<RegStorage>& reserved64_regs,
-                                    const std::vector<RegStorage>& core_temps,
-                                    const std::vector<RegStorage>& core64_temps,
-                                    const std::vector<RegStorage>& sp_temps,
-                                    const std::vector<RegStorage>& dp_temps) :
+                                    const ArrayRef<const RegStorage>& core_regs,
+                                    const ArrayRef<const RegStorage>& core64_regs,
+                                    const ArrayRef<const RegStorage>& sp_regs,
+                                    const ArrayRef<const RegStorage>& dp_regs,
+                                    const ArrayRef<const RegStorage>& reserved_regs,
+                                    const ArrayRef<const RegStorage>& reserved64_regs,
+                                    const ArrayRef<const RegStorage>& core_temps,
+                                    const ArrayRef<const RegStorage>& core64_temps,
+                                    const ArrayRef<const RegStorage>& sp_temps,
+                                    const ArrayRef<const RegStorage>& dp_temps) :
     core_regs_(arena, core_regs.size()), next_core_reg_(0),
     core64_regs_(arena, core64_regs.size()), next_core64_reg_(0),
     sp_regs_(arena, sp_regs.size()), next_sp_reg_(0),
@@ -128,6 +128,15 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena,
   // Add an entry for InvalidReg with zero'd mask.
   RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0);
   m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg);
+
+  // Existence of core64 registers implies wide references.
+  if (core64_regs_.Size() != 0) {
+    ref_regs_ = &core64_regs_;
+    next_ref_reg_ = &next_core64_reg_;
+  } else {
+    ref_regs_ = &core_regs_;
+    next_ref_reg_ = &next_core_reg_;
+  }
 }
 
 void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) {
@@ -145,6 +154,7 @@ void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) {
 
 void Mir2Lir::DumpCoreRegPool() {
   DumpRegPool(&reg_pool_->core_regs_);
+  DumpRegPool(&reg_pool_->core64_regs_);
 }
 
 void Mir2Lir::DumpFpRegPool() {
@@ -274,6 +284,7 @@ void Mir2Lir::RecordCorePromotion(RegStorage reg, int s_reg) {
 
 /* Reserve a callee-save register.  Return InvalidReg if none available */
 RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
+  // TODO: 64-bit and refreg update
   RegStorage res;
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
@@ -406,6 +417,12 @@ RegStorage Mir2Lir::AllocTempWide() {
   return res;
 }
 
+RegStorage Mir2Lir::AllocTempRef() {
+  RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, true);
+  DCHECK(!res.IsPair());
+  return res;
+}
+
 RegStorage Mir2Lir::AllocTempSingle() {
   RegStorage res = AllocTempBody(reg_pool_->sp_regs_, &reg_pool_->next_sp_reg_, true);
   DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits();
@@ -419,6 +436,7 @@ RegStorage Mir2Lir::AllocTempDouble() {
 }
 
 RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+  DCHECK_NE(reg_class, kRefReg);  // NOTE: the Dalvik width of a reference is always 32 bits.
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     return AllocTempDouble();
   }
@@ -428,6 +446,8 @@ RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
 RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     return AllocTempSingle();
+  } else if (reg_class == kRefReg) {
+    return AllocTempRef();
   }
   return AllocTemp();
 }
@@ -446,8 +466,10 @@ RegStorage Mir2Lir::FindLiveReg(GrowableArray<RegisterInfo*> &regs, int s_reg) {
 
 RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
   RegStorage reg;
-  // TODO: might be worth a sanity check here to verify at most 1 live reg per s_reg.
-  if ((reg_class == kAnyReg) || (reg_class == kFPReg)) {
+  if (reg_class == kRefReg) {
+    reg = FindLiveReg(*reg_pool_->ref_regs_, s_reg);
+  }
+  if (!reg.Valid() && ((reg_class == kAnyReg) || (reg_class == kFPReg))) {
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
@@ -662,39 +684,6 @@ void Mir2Lir::MarkDefWide(RegLocation rl, LIR *start, LIR *finish) {
   p->SetDefEnd(finish);
 }
 
-RegLocation Mir2Lir::WideToNarrow(RegLocation rl) {
-  DCHECK(rl.wide);
-  if (rl.location == kLocPhysReg) {
-    if (rl.reg.IsPair()) {
-      RegisterInfo* info_lo = GetRegInfo(rl.reg.GetLow());
-      RegisterInfo* info_hi = GetRegInfo(rl.reg.GetHigh());
-      if (info_lo->IsTemp()) {
-        info_lo->SetIsWide(false);
-        info_lo->ResetDefBody();
-      }
-      if (info_hi->IsTemp()) {
-        info_hi->SetIsWide(false);
-        info_hi->ResetDefBody();
-      }
-      rl.reg = rl.reg.GetLow();
-    } else {
-      /*
-       * TODO: If not a pair, we can't just drop the high register.  On some targets, we may be
-       * able to re-cast the 64-bit register as 32 bits, so it might be worthwhile to revisit
-       * this code.  Will probably want to make this a virtual function.
-       */
-      // Can't narrow 64-bit register.  Clobber.
-      if (GetRegInfo(rl.reg)->IsTemp()) {
-        Clobber(rl.reg);
-        FreeTemp(rl.reg);
-      }
-      rl.location = kLocDalvikFrame;
-    }
-  }
-  rl.wide = false;
-  return rl;
-}
-
 void Mir2Lir::ResetDefLoc(RegLocation rl) {
   DCHECK(!rl.wide);
   if (IsTemp(rl.reg) && !(cu_->disable_opt & (1 << kSuppressLoads))) {
@@ -714,16 +703,8 @@ void Mir2Lir::ResetDefLocWide(RegLocation rl) {
 }
 
 void Mir2Lir::ResetDefTracking() {
-  GrowableArray<RegisterInfo*>::Iterator core_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = core_it.Next(); info != nullptr; info = core_it.Next()) {
-    info->ResetDefBody();
-  }
-  GrowableArray<RegisterInfo*>::Iterator sp_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = sp_it.Next(); info != nullptr; info = sp_it.Next()) {
-    info->ResetDefBody();
-  }
-  GrowableArray<RegisterInfo*>::Iterator dp_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = dp_it.Next(); info != nullptr; info = dp_it.Next()) {
+  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
+  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
     info->ResetDefBody();
   }
 }
@@ -798,7 +779,11 @@ void Mir2Lir::FlushAllRegs() {
 bool Mir2Lir::RegClassMatches(int reg_class, RegStorage reg) {
   if (reg_class == kAnyReg) {
     return true;
-  } else if (reg_class == kCoreReg) {
+  } else if ((reg_class == kCoreReg) || (reg_class == kRefReg)) {
+    /*
+     * For this purpose, consider Core and Ref to be the same class. We aren't dealing
+     * with width here - that should be checked at a higher level (if needed).
+     */
     return !reg.IsFloat();
   } else {
     return reg.IsFloat();
@@ -1334,20 +1319,26 @@ int Mir2Lir::SRegOffset(int s_reg) {
 }
 
 /* Mark register usage state and return long retloc */
-RegLocation Mir2Lir::GetReturnWide(bool is_double) {
-  RegLocation gpr_res = LocCReturnWide();
-  RegLocation fpr_res = LocCReturnDouble();
-  RegLocation res = is_double ? fpr_res : gpr_res;
+RegLocation Mir2Lir::GetReturnWide(RegisterClass reg_class) {
+  RegLocation res;
+  switch (reg_class) {
+    case kRefReg: LOG(FATAL); break;
+    case kFPReg: res = LocCReturnDouble(); break;
+    default: res = LocCReturnWide(); break;
+  }
   Clobber(res.reg);
   LockTemp(res.reg);
   MarkWide(res.reg);
   return res;
 }
 
-RegLocation Mir2Lir::GetReturn(bool is_float) {
-  RegLocation gpr_res = LocCReturn();
-  RegLocation fpr_res = LocCReturnFloat();
-  RegLocation res = is_float ? fpr_res : gpr_res;
+RegLocation Mir2Lir::GetReturn(RegisterClass reg_class) {
+  RegLocation res;
+  switch (reg_class) {
+    case kRefReg: res = LocCReturnRef(); break;
+    case kFPReg: res = LocCReturnFloat(); break;
+    default: res = LocCReturn(); break;
+  }
   Clobber(res.reg);
   if (cu_->instruction_set == kMips) {
     MarkInUse(res.reg);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 91a66d38e0..39a036560e 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -63,27 +63,36 @@ const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = {
 { kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16TI8", "fs:[!0d],!1d" }, \
   \
 { kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32MR", "[!0r+!1d],!2r" }, \
-{ kX86 ## opname ## 64MR,  kMemReg64,  mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64MR", "[!0r+!1d],!2r" }, \
 { kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
-{ kX86 ## opname ## 64AR,  kArrayReg64, mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
 { kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32TR", "fs:[!0d],!1r" }, \
 { kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RR", "!0r,!1r" }, \
 { kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,         0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
-{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,         0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
 { kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RT", "!0r,fs:[!1d]" }, \
-{ kX86 ## opname ## 64RT,  kReg64Thread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RT", "!0r,fs:[!1d]" }, \
 { kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
-{ kX86 ## opname ## 64RI,  kReg64Imm,            IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
 { kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32MI", "[!0r+!1d],!2d" }, \
 { kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
 { kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32TI", "fs:[!0d],!1d" }, \
 { kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32RI8", "!0r,!1d" }, \
-{ kX86 ## opname ## 64RI8, kReg64Imm,            IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64RI8", "!0r,!1d" }, \
 { kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \
 { kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
-{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32TI8", "fs:[!0d],!1d" }
+{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32TI8", "fs:[!0d],!1d" }, \
+  \
+{ kX86 ## opname ## 64MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 64AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 64TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64TR", "fs:[!0d],!1r" }, \
+{ kX86 ## opname ## 64RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RR", "!0r,!1r" }, \
+{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 64RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 64RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "64RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "64TI", "fs:[!0d],!1d" }, \
+{ kX86 ## opname ## 64RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64MI8", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64TI8", "fs:[!0d],!1d" }
 
 ENCODING_MAP(Add, IS_LOAD | IS_STORE, REG_DEF0, 0,
   0x00 /* RegMem8/Reg8 */,     0x01 /* RegMem32/Reg32 */,
@@ -146,6 +155,13 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86Imul32RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" },
   { kX86Imul32RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
 
+  { kX86Imul64RRI,   kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RRI", "!0r,!1r,!2d" },
+  { kX86Imul64RMI,   kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RMI", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul64RAI,   kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+  { kX86Imul64RRI8,  kRegRegImm,             IS_TERTIARY_OP | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RRI8", "!0r,!1r,!2d" },
+  { kX86Imul64RMI8,  kRegMemImm,   IS_LOAD | IS_QUAD_OP     | REG_DEF0_USE1  | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" },
+  { kX86Imul64RAI8,  kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" },
+
   { kX86Mov8MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x88, 0, 0, 0, 0, 0 }, "Mov8MR", "[!0r+!1d],!2r" },
   { kX86Mov8AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x88, 0, 0, 0, 0, 0 }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8TR", "fs:[!0d],!1r" },
@@ -171,30 +187,42 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" },
 
   { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" },
-  { kX86Mov64MR, kMemReg64,  IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,         0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" },
   { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
-  { kX86Mov64AR, kArrayReg64, IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,        0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" },
   { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" },
   { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" },
-  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,         0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" },
   { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
-  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,         0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
   { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" },
-  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" },
   { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" },
   { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" },
   { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
-  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4 }, "Mov64TI", "fs:[!0d],!1d" },
 
-  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
+  { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
 
-  { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { 0,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+
+  { kX86Mov64MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" },
+  { kX86Mov64AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0 }, "Mov64TR", "fs:[!0d],!1r" },
+  { kX86Mov64RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RR", "!0r,!1r" },
+  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" },
+  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" },
+  { kX86Mov64RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { REX_W,             0, 0xB8, 0, 0, 0, 0, 8 }, "Mov64RI", "!0r,!1d" },
+  { kX86Mov64MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64MI", "[!0r+!1d],!2d" },
+  { kX86Mov64AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { REX_W,             0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 8 }, "Mov64TI", "fs:[!0d],!1d" },
 
-  { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" },
+  { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { REX_W,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RM", "!0r,[!1r+!2d]" },
 
-  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
+  { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12,                 { REX_W,             0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+
+  { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0,     0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" },
+  { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RR", "!2c !0r,!1r" },
+
+  { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0,     0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" },
+  { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" },
 
 #define SHIFT_ENCODING_MAP(opname, modrm_opcode) \
 { kX86 ## opname ## 8RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { 0,    0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \
@@ -216,7 +244,14 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 { kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { 0,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
 { kX86 ## opname ## 32RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32RC", "!0r,cl" }, \
 { kX86 ## opname ## 32MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32MC", "[!0r+!1d],cl" }, \
-{ kX86 ## opname ## 32AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }
+{ kX86 ## opname ## 32AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { 0,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \
+  \
+{ kX86 ## opname ## 64RI, kShiftRegImm,                        IS_BINARY_OP   | REG_DEF0_USE0 |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64MI, kShiftMemImm,   IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64MI", "[!0r+!1d],!2d" }, \
+{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     |            SETS_CCODES, { REX_W,    0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
+{ kX86 ## opname ## 64RC, kShiftRegCl,                         IS_BINARY_OP   | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64RC", "!0r,cl" }, \
+{ kX86 ## opname ## 64MC, kShiftMemCl,    IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0      | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64MC", "[!0r+!1d],cl" }, \
+{ kX86 ## opname ## 64AC, kShiftArrayCl,  IS_LOAD | IS_STORE | IS_QUIN_OP     | REG_USE01     | REG_USEC | SETS_CCODES, { REX_W,    0, 0xD3, 0, 0, modrm_opcode, 0,    0 }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" }
 
   SHIFT_ENCODING_MAP(Rol, 0x0),
   SHIFT_ENCODING_MAP(Ror, 0x1),
@@ -232,6 +267,10 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86Shld32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32MRI", "[!0r+!1d],!2r,!3d" },
   { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32RRI", "!0r,!1r,!2d" },
   { kX86Shrd32MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shld64RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64RRI", "!0r,!1r,!2d" },
+  { kX86Shld64MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64MRI", "[!0r+!1d],!2r,!3d" },
+  { kX86Shrd64RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64RRI", "!0r,!1r,!2d" },
+  { kX86Shrd64MRI,  kMemRegImm,    IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64MRI", "[!0r+!1d],!2r,!3d" },
 
   { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
   { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
@@ -242,7 +281,12 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86Test32RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32RI", "!0r,!1d" },
   { kX86Test32MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32MI", "[!0r+!1d],!2d" },
   { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { 0,    0, 0xF7, 0, 0, 0, 0, 4}, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" },
+  { kX86Test64RI, kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64RI", "!0r,!1d" },
+  { kX86Test64MI, kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64MI", "[!0r+!1d],!2d" },
+  { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP     | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" },
+
   { kX86Test32RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { 0,    0, 0x85, 0, 0, 0, 0, 0}, "Test32RR", "!0r,!1r" },
+  { kX86Test64RR, kRegReg,             IS_BINARY_OP   | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0}, "Test64RR", "!0r,!1r" },
 
 #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \
                            reg, reg_kind, reg_flags, \
@@ -258,7 +302,10 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 { kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \
 { kX86 ## opname ## 32 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, w_format "!0r" }, \
 { kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, w_format "[!0r+!1d]" }, \
-{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
+{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { 0,    0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \
+{ kX86 ## opname ## 64 ## reg, reg_kind,                      reg_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #reg, w_format "!0r" }, \
+{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #mem, w_format "[!0r+!1d]" }, \
+{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags  | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" }
 
   UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
   UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
@@ -431,7 +478,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
 };
 
-size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
+size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement,
+                               int reg_r, int reg_x, bool has_sib) {
   size_t size = 0;
   if (entry->skeleton.prefix1 > 0) {
     ++size;
@@ -439,6 +487,10 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa
       ++size;
     }
   }
+  if ((NeedsRex(base) || NeedsRex(reg_r) || NeedsRex(reg_x)) &&
+       entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
+    ++size;  // REX_R
+  }
   ++size;  // opcode
   if (entry->skeleton.opcode == 0x0F) {
     ++size;
@@ -447,13 +499,13 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa
     }
   }
   ++size;  // modrm
-  if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()
+  if (has_sib || LowRegisterBits(RegStorage::RegNum(base)) == rs_rX86_SP.GetRegNum()
       || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
     // SP requires a SIB byte.
     // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
     ++size;
   }
-  if (displacement != 0 || RegStorage::RegNum(base) == rs_rBP.GetRegNum()) {
+  if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) {
     // BP requires an explicit displacement, even when it's 0.
     if (entry->opcode != kX86Lea32RA) {
       DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name;
@@ -477,38 +529,41 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
     case kPrefix2Nullary:
       return 3;  // 1 byte of opcode + 2 prefixes
     case kRegOpcode:  // lir operands - 0: reg
-      return ComputeSize(entry, 0, 0, false) - 1;  // substract 1 for modrm
-    case kReg64:
+      // substract 1 for modrm
+      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - 1;
     case kReg:  // lir operands - 0: reg
-      return ComputeSize(entry, 0, 0, false);
+      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
     case kMem:  // lir operands - 0: base, 1: disp
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-      return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
-    case kMemReg64:
+      return ComputeSize(entry, lir->operands[0], lir->operands[3],
+                         NO_REG, lir->operands[1], true);
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+      return ComputeSize(entry, lir->operands[0], lir->operands[1],
+                         lir->operands[2], NO_REG, false);
     case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
-    case kArrayReg64:
+      return ComputeSize(entry, lir->operands[0], lir->operands[1],
+                         lir->operands[2], NO_REG, false);
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+      return ComputeSize(entry, lir->operands[0], lir->operands[3],
+                         lir->operands[4], lir->operands[1], true);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
-      return ComputeSize(entry, 0, lir->operands[0], false);
-    case kRegReg:
-      return ComputeSize(entry, 0, 0, false);
-    case kRegRegStore:
-      return ComputeSize(entry, 0, 0, false);
+      return ComputeSize(entry, 0, lir->operands[0], lir->operands[1], NO_REG, false);
+    case kRegReg:  // lir operands - 0: reg1, 1: reg2
+      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
+    case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
+      return ComputeSize(entry, 0, 0, lir->operands[1], lir->operands[0], false);
     case kRegMem:  // lir operands - 0: reg, 1: base, 2: disp
-      return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
+      return ComputeSize(entry, lir->operands[1], lir->operands[2],
+                         lir->operands[0], NO_REG, false);
     case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
-      return ComputeSize(entry, lir->operands[1], lir->operands[4], true);
-    case kReg64Thread:  // lir operands - 0: reg, 1: disp
+      return ComputeSize(entry, lir->operands[1], lir->operands[4],
+                         lir->operands[0], lir->operands[2], true);
     case kRegThread:  // lir operands - 0: reg, 1: disp
-      return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
-    case kReg64Imm:
+      // displacement size is always 32bit
+      return ComputeSize(entry, 0, 0x12345678, lir->operands[0], NO_REG, false);
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
-      size_t size = ComputeSize(entry, 0, 0, false);
+      size_t size = ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
       if (entry->skeleton.ax_opcode == 0) {
         return size;
       } else {
@@ -518,47 +573,58 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
       }
     }
     case kMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+      return ComputeSize(entry, lir->operands[0], lir->operands[1],
+                         NO_REG, lir->operands[0], false);
     case kArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
-      return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+      return ComputeSize(entry, lir->operands[0], lir->operands[3],
+                         NO_REG, lir->operands[1], true);
     case kThreadImm:  // lir operands - 0: disp, 1: imm
-      return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
+      // displacement size is always 32bit
+      return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
     case kRegRegImm:  // lir operands - 0: reg, 1: reg, 2: imm
     case kRegRegImmRev:
-      return ComputeSize(entry, 0, 0, false);
+      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
     case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
-      return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
+      return ComputeSize(entry, lir->operands[1], lir->operands[2],
+                         lir->operands[0], NO_REG, false);
     case kRegArrayImm:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm
-      return ComputeSize(entry, lir->operands[1], lir->operands[4], true);
+      return ComputeSize(entry, lir->operands[1], lir->operands[4],
+                         lir->operands[0], lir->operands[2], true);
     case kMovRegImm:  // lir operands - 0: reg, 1: immediate
-      return 1 + entry->skeleton.immediate_bytes;
+      return (entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])?1:0) +
+             1 + entry->skeleton.immediate_bytes;
     case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, 0, 0, false) - (lir->operands[1] == 1 ? 1 : 0);
+      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) -
+             (lir->operands[1] == 1 ? 1 : 0);
     case kShiftMemImm:  // lir operands - 0: base, 1: disp, 2: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], false) -
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false) -
              (lir->operands[2] == 1 ? 1 : 0);
     case kShiftArrayImm:  // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
       // Shift by immediate one has a shorter opcode.
-      return ComputeSize(entry, lir->operands[0], lir->operands[3], true) -
+      return ComputeSize(entry, lir->operands[0], lir->operands[3],
+                         NO_REG, lir->operands[1], true) -
              (lir->operands[4] == 1 ? 1 : 0);
-    case kShiftRegCl:
-      return ComputeSize(entry, 0, 0, false);
+    case kShiftRegCl:  // lir operands - 0: reg, 1: cl
+      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
     case kShiftMemCl:  // lir operands - 0: base, 1: disp, 2: cl
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
     case kShiftArrayCl:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
-      return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+      return ComputeSize(entry, lir->operands[0], lir->operands[3],
+                         lir->operands[4], lir->operands[1], true);
     case kRegCond:  // lir operands - 0: reg, 1: cond
-      return ComputeSize(entry, 0, 0, false);
+      return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false);
     case kMemCond:  // lir operands - 0: base, 1: disp, 2: cond
-      return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+      return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
     case kArrayCond:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond
-      return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+      return ComputeSize(entry, lir->operands[0], lir->operands[3],
+                         NO_REG, lir->operands[1], true);
     case kRegRegCond:  // lir operands - 0: reg, 1: reg, 2: cond
-      return ComputeSize(entry, 0, 0, false);
+      return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false);
     case kRegMemCond:  // lir operands - 0: reg, 1: reg, 2: disp, 3:cond
-      return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
+      return ComputeSize(entry, lir->operands[1], lir->operands[2],
+                         lir->operands[0], lir->operands[1], false);
     case kJcc:
       if (lir->opcode == kX86Jcc8) {
         return 2;  // opcode + rel8
@@ -572,21 +638,28 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
       } else if (lir->opcode == kX86Jmp32) {
         return 5;  // opcode + rel32
       } else if (lir->opcode == kX86JmpT) {
-        return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
+        // displacement size is always 32bit
+        return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
       } else {
         DCHECK(lir->opcode == kX86JmpR);
-        return 2;  // opcode + modrm
+        if (NeedsRex(lir->operands[0])) {
+          return 3;  // REX.B + opcode + modrm
+        } else {
+          return 2;  // opcode + modrm
+        }
       }
     case kCall:
       switch (lir->opcode) {
         case kX86CallI: return 5;  // opcode 0:disp
         case kX86CallR: return 2;  // opcode modrm
         case kX86CallM:  // lir operands - 0: base, 1: disp
-          return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+          return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false);
         case kX86CallA:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
-          return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+          return ComputeSize(entry, lir->operands[0], lir->operands[3],
+                             NO_REG, lir->operands[1], true);
         case kX86CallT:  // lir operands - 0: disp
-          return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
+          // displacement size is always 32bit
+          return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false);
         default:
           break;
       }
@@ -594,16 +667,19 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
     case kPcRel:
       if (entry->opcode == kX86PcRelLoadRA) {
         // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
-        return ComputeSize(entry, lir->operands[1], 0x12345678, true);
+        return ComputeSize(entry, lir->operands[1], 0x12345678,
+                           lir->operands[0], lir->operands[2], true);
       } else {
         DCHECK(entry->opcode == kX86PcRelAdr);
         return 5;  // opcode with reg + 4 byte immediate
       }
-    case kMacro:
+    case kMacro:  // lir operands - 0: reg
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
-          ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, false) -
-          (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);  // shorter ax encoding
+          ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0,
+                      lir->operands[0], NO_REG, false) -
+          // shorter ax encoding
+          (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
     default:
       break;
   }
@@ -612,19 +688,62 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
 }
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) {
+  EmitPrefix(entry, NO_REG, NO_REG, NO_REG);
+}
+
+void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
+                            uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) {
+  // REX.WRXB
+  // W - 64-bit operand
+  // R - MODRM.reg
+  // X - SIB.index
+  // B - MODRM.rm/SIB.base
+  bool force = false;
+  bool w = (entry->skeleton.prefix1 == REX_W) || (entry->skeleton.prefix2 == REX_W);
+  bool r = NeedsRex(reg_r);
+  bool x = NeedsRex(reg_x);
+  bool b = NeedsRex(reg_b);
+  uint8_t rex = force ? 0x40 : 0;
+  if (w) {
+    rex |= 0x48;  // REX.W000
+  }
+  if (r) {
+    rex |= 0x44;  // REX.0R00
+  }
+  if (x) {
+    rex |= 0x42;  // REX.00X0
+  }
+  if (b) {
+    rex |= 0x41;  // REX.000B
+  }
   if (entry->skeleton.prefix1 != 0) {
     if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) {
       // 64 bit adresses by GS, not FS
       code_buffer_.push_back(THREAD_PREFIX_GS);
     } else {
-      code_buffer_.push_back(entry->skeleton.prefix1);
+      if (entry->skeleton.prefix1 == REX_W) {
+        rex |= entry->skeleton.prefix1;
+        code_buffer_.push_back(rex);
+        rex = 0;
+      } else {
+        code_buffer_.push_back(entry->skeleton.prefix1);
+      }
     }
     if (entry->skeleton.prefix2 != 0) {
-      code_buffer_.push_back(entry->skeleton.prefix2);
+      if (entry->skeleton.prefix2 == REX_W) {
+        rex |= entry->skeleton.prefix2;
+        code_buffer_.push_back(rex);
+        rex = 0;
+      } else {
+        code_buffer_.push_back(entry->skeleton.prefix2);
+      }
     }
   } else {
     DCHECK_EQ(0, entry->skeleton.prefix2);
   }
+  if (rex != 0) {
+    code_buffer_.push_back(rex);
+  }
 }
 
 void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) {
@@ -643,7 +762,12 @@ void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) {
 }
 
 void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry) {
-  EmitPrefix(entry);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG);
+}
+
+void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry,
+                                     uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) {
+  EmitPrefix(entry, reg_r, reg_x, reg_b);
   EmitOpcode(entry);
 }
 
@@ -712,7 +836,7 @@ void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t i
   EmitDisp(base, disp);
 }
 
-void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int imm) {
+void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) {
   switch (entry->skeleton.immediate_bytes) {
     case 1:
       DCHECK(IS_SIMM8(imm));
@@ -724,11 +848,26 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int imm) {
       code_buffer_.push_back((imm >> 8) & 0xFF);
       break;
     case 4:
+      if (imm <0) {
+        CHECK_EQ((-imm) & 0x0FFFFFFFFl, -imm);
+      } else {
+        CHECK_EQ(imm & 0x0FFFFFFFFl, imm);
+      }
       code_buffer_.push_back(imm & 0xFF);
       code_buffer_.push_back((imm >> 8) & 0xFF);
       code_buffer_.push_back((imm >> 16) & 0xFF);
       code_buffer_.push_back((imm >> 24) & 0xFF);
       break;
+    case 8:
+      code_buffer_.push_back(imm & 0xFF);
+      code_buffer_.push_back((imm >> 8) & 0xFF);
+      code_buffer_.push_back((imm >> 16) & 0xFF);
+      code_buffer_.push_back((imm >> 24) & 0xFF);
+      code_buffer_.push_back((imm >> 32) & 0xFF);
+      code_buffer_.push_back((imm >> 40) & 0xFF);
+      code_buffer_.push_back((imm >> 48) & 0xFF);
+      code_buffer_.push_back((imm >> 56) & 0xFF);
+      break;
     default:
       LOG(FATAL) << "Unexpected immediate bytes (" << entry->skeleton.immediate_bytes
                  << ") for instruction: " << entry->name;
@@ -737,7 +876,8 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int imm) {
 }
 
 void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
+  reg = LowRegisterBits(reg);
   // There's no 3-byte instruction with +rd
   DCHECK(entry->skeleton.opcode != 0x0F ||
          (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
@@ -749,7 +889,8 @@ void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) {
 }
 
 void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
+  reg = LowRegisterBits(reg);
   if (RegStorage::RegNum(reg) >= 4) {
     DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
         << static_cast<int>(RegStorage::RegNum(reg))
@@ -763,7 +904,8 @@ void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) {
 }
 
 void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) {
-  EmitPrefix(entry);
+  EmitPrefix(entry, NO_REG, NO_REG, base);
+  base = LowRegisterBits(base);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -775,15 +917,29 @@ void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp)
 
 void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index,
                              int scale, int disp) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, NO_REG, index, base);
+  index = LowRegisterBits(index);
+  base = LowRegisterBits(base);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
+uint8_t X86Mir2Lir::LowRegisterBits(uint8_t reg) {
+  uint8_t res = reg;
+  res = reg & kRegNumMask32;  // 3 bits
+  return res;
+}
+
+bool X86Mir2Lir::NeedsRex(uint8_t reg) {
+  return RegStorage::RegNum(reg) > 7;
+}
+
 void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry,
                        uint8_t base, int disp, uint8_t reg) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg, NO_REG, base);
+  reg = LowRegisterBits(reg);
+  base = LowRegisterBits(base);
   if (RegStorage::RegNum(reg) >= 4) {
     DCHECK(strchr(entry->name, '8') == NULL ||
            entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM)
@@ -802,9 +958,12 @@ void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry,
   EmitMemReg(entry, base, disp, reg);
 }
 
-void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index,
-                              int scale, int disp) {
-  EmitPrefixAndOpcode(entry);
+void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base,
+                              uint8_t index, int scale, int disp) {
+  EmitPrefixAndOpcode(entry, reg, index, base);
+  reg = LowRegisterBits(reg);
+  index = LowRegisterBits(index);
+  base = LowRegisterBits(base);
   EmitModrmSibDisp(reg, base, index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -819,7 +978,9 @@ void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t
 
 void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale,
                               int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, NO_REG, index, base);
+  index = LowRegisterBits(index);
+  base = LowRegisterBits(base);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
@@ -827,7 +988,8 @@ void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t
 
 void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) {
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG);
+  reg = LowRegisterBits(reg);
   if (RegStorage::RegNum(reg) >= 4) {
     DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " "
         << static_cast<int>(RegStorage::RegNum(reg))
@@ -845,7 +1007,9 @@ void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int dis
 }
 
 void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
+  reg1 = LowRegisterBits(reg1);
+  reg2 = LowRegisterBits(reg2);
   DCHECK_LT(RegStorage::RegNum(reg1), 8);
   DCHECK_LT(RegStorage::RegNum(reg2), 8);
   uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
@@ -857,7 +1021,9 @@ void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t r
 
 void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry,
                           uint8_t reg1, uint8_t reg2, int32_t imm) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
+  reg1 = LowRegisterBits(reg1);
+  reg2 = LowRegisterBits(reg2);
   DCHECK_LT(RegStorage::RegNum(reg1), 8);
   DCHECK_LT(RegStorage::RegNum(reg2), 8);
   uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2);
@@ -874,7 +1040,9 @@ void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry,
 
 void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
                                uint8_t reg, uint8_t base, int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg, NO_REG, base);
+  reg = LowRegisterBits(reg);
+  base = LowRegisterBits(base);
   DCHECK(!RegStorage::IsFloat(reg));
   DCHECK_LT(RegStorage::RegNum(reg), 8);
   EmitModrmDisp(reg, base, disp);
@@ -889,10 +1057,11 @@ void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry,
 }
 
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  EmitPrefix(entry);
+  EmitPrefix(entry, NO_REG, NO_REG, reg);
   if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
+    reg = LowRegisterBits(reg);
     EmitOpcode(entry);
     uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
     code_buffer_.push_back(modrm);
@@ -901,7 +1070,8 @@ void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
 }
 
 void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base);
+  base = LowRegisterBits(base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   EmitImm(entry, imm);
@@ -918,17 +1088,37 @@ void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) {
   DCHECK_EQ(entry->skeleton.ax_opcode, 0);
 }
 
-void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
+void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm) {
+  EmitPrefix(entry, NO_REG, NO_REG, reg);
+  reg = LowRegisterBits(reg);
   DCHECK_LT(RegStorage::RegNum(reg), 8);
   code_buffer_.push_back(0xB8 + RegStorage::RegNum(reg));
-  code_buffer_.push_back(imm & 0xFF);
-  code_buffer_.push_back((imm >> 8) & 0xFF);
-  code_buffer_.push_back((imm >> 16) & 0xFF);
-  code_buffer_.push_back((imm >> 24) & 0xFF);
+  switch (entry->skeleton.immediate_bytes) {
+    case 4:
+      code_buffer_.push_back(imm & 0xFF);
+      code_buffer_.push_back((imm >> 8) & 0xFF);
+      code_buffer_.push_back((imm >> 16) & 0xFF);
+      code_buffer_.push_back((imm >> 24) & 0xFF);
+      break;
+    case 8:
+      code_buffer_.push_back(imm & 0xFF);
+      code_buffer_.push_back((imm >> 8) & 0xFF);
+      code_buffer_.push_back((imm >> 16) & 0xFF);
+      code_buffer_.push_back((imm >> 24) & 0xFF);
+      code_buffer_.push_back((imm >> 32) & 0xFF);
+      code_buffer_.push_back((imm >> 40) & 0xFF);
+      code_buffer_.push_back((imm >> 48) & 0xFF);
+      code_buffer_.push_back((imm >> 56) & 0xFF);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported immediate size for EmitMovRegImm: "
+                 << static_cast<uint32_t>(entry->skeleton.immediate_bytes);
+  }
 }
 
 void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  EmitPrefix(entry);
+  EmitPrefix(entry, NO_REG, NO_REG, reg);
+  reg = LowRegisterBits(reg);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -955,7 +1145,8 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int i
 
 void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) {
   DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
-  EmitPrefix(entry);
+  EmitPrefix(entry, reg, NO_REG, NO_REG);
+  reg = LowRegisterBits(reg);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -970,7 +1161,8 @@ void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_
 void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
                                 int displacement, uint8_t cl) {
   DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg()));
-  EmitPrefix(entry);
+  EmitPrefix(entry, NO_REG, NO_REG, base);
+  base = LowRegisterBits(base);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -983,7 +1175,8 @@ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
 
 void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base,
                                 int displacement, int imm) {
-  EmitPrefix(entry);
+  EmitPrefix(entry, NO_REG, NO_REG, base);
+  base = LowRegisterBits(base);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1002,7 +1195,8 @@ void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base,
 }
 
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
-  EmitPrefix(entry);
+  EmitPrefix(entry, reg, NO_REG, NO_REG);
+  reg = LowRegisterBits(reg);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
@@ -1015,7 +1209,8 @@ void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t c
   DCHECK_EQ(entry->skeleton.immediate_bytes, 0);
 }
 
-void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition) {
+void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement,
+                             uint8_t condition) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
     if (entry->skeleton.prefix2 != 0) {
@@ -1037,7 +1232,9 @@ void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int disp
 void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2,
                                 uint8_t condition) {
   // Generate prefix and opcode without the condition
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2);
+  reg1 = LowRegisterBits(reg1);
+  reg2 = LowRegisterBits(reg2);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
   DCHECK_LE(condition, 0xF);
@@ -1059,9 +1256,12 @@ void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8
   code_buffer_.push_back(modrm);
 }
 
-void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition) {
+void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base,
+                                int displacement, uint8_t condition) {
   // Generate prefix and opcode without the condition
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, reg1, NO_REG, base);
+  reg1 = LowRegisterBits(reg1);
+  base = LowRegisterBits(base);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
   DCHECK_LE(condition, 0xF);
@@ -1094,8 +1294,10 @@ void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) {
     code_buffer_.push_back(rel & 0xFF);
   } else {
     DCHECK(entry->opcode == kX86JmpR);
-    code_buffer_.push_back(entry->skeleton.opcode);
     uint8_t reg = static_cast<uint8_t>(rel);
+    EmitPrefix(entry, NO_REG, NO_REG, reg);
+    code_buffer_.push_back(entry->skeleton.opcode);
+    reg = LowRegisterBits(reg);
     DCHECK_LT(RegStorage::RegNum(reg), 8);
     uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg);
     code_buffer_.push_back(modrm);
@@ -1120,7 +1322,8 @@ void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc) {
 }
 
 void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp) {
-  EmitPrefixAndOpcode(entry);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base);
+  base = LowRegisterBits(base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -1161,9 +1364,12 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg,
         reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table));
     disp = tab_rec->offset;
   }
-  EmitPrefix(entry);
-  DCHECK_LT(RegStorage::RegNum(reg), 8);
   if (entry->opcode == kX86PcRelLoadRA) {
+    EmitPrefix(entry, reg, index, base_or_table);
+    reg = LowRegisterBits(reg);
+    base_or_table = LowRegisterBits(base_or_table);
+    index = LowRegisterBits(index);
+    DCHECK_LT(RegStorage::RegNum(reg), 8);
     code_buffer_.push_back(entry->skeleton.opcode);
     DCHECK_NE(0x0F, entry->skeleton.opcode);
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1178,6 +1384,7 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg,
     code_buffer_.push_back(sib);
     DCHECK_EQ(0, entry->skeleton.immediate_bytes);
   } else {
+    DCHECK_LT(RegStorage::RegNum(reg), 8);
     code_buffer_.push_back(entry->skeleton.opcode + RegStorage::RegNum(reg));
   }
   code_buffer_.push_back(disp & 0xFF);
@@ -1190,6 +1397,8 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg,
 
 void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) {
   DCHECK(entry->opcode == kX86StartOfMethod) << entry->name;
+  EmitPrefix(entry, reg, NO_REG, NO_REG);
+  reg = LowRegisterBits(reg);
   code_buffer_.push_back(0xE8);  // call +0
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
@@ -1380,7 +1589,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
       case kRegOpcode:  // lir operands - 0: reg
         EmitOpRegOpcode(entry, lir->operands[0]);
         break;
-      case kReg64:
       case kReg:  // lir operands - 0: reg
         EmitOpReg(entry, lir->operands[0]);
         break;
@@ -1390,7 +1598,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
       case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
         EmitOpArray(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
         break;
-      case kMemReg64:
       case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
         EmitMemReg(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
@@ -1401,7 +1608,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
         EmitArrayImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
         break;
-      case kArrayReg64:
       case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
         EmitArrayReg(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
@@ -1413,7 +1619,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
         EmitRegArray(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
         break;
-      case kReg64Thread:  // lir operands - 0: reg, 1: disp
       case kRegThread:  // lir operands - 0: reg, 1: disp
         EmitRegThread(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1437,7 +1642,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
         EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                       lir->operands[3]);
         break;
-      case kReg64Imm:
       case kRegImm:  // lir operands - 0: reg, 1: immediate
         EmitRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1469,7 +1673,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
         EmitRegRegCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
       case kRegMemCond:  // lir operands - 0: reg, 1: reg, displacement, 3: condition
-        EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
+        EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+                       lir->operands[3]);
         break;
       case kJmp:  // lir operands - 0: rel
         if (entry->opcode == kX86JmpT) {
@@ -1503,7 +1708,7 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
         EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                   lir->operands[3], lir->operands[4]);
         break;
-      case kMacro:
+      case kMacro:  // lir operands - 0: reg
         EmitMacro(entry, lir->operands[0], lir->offset);
         break;
       default:
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 4673cc0f7e..f363eb3a63 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -169,7 +169,7 @@ void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Is64BitInstructionSet(cu_->instruction_set) ?
       Thread::ExceptionOffset<8>().Int32Value() :
       Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   NewLIR2(kX86Mov32RT, rl_result.reg.GetReg(), ex_offset);
   NewLIR2(kX86Mov32TI, ex_offset, 0);
   StoreValue(rl_dest, rl_result);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 1807d5c13e..648c148c15 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -59,6 +59,7 @@ class X86Mir2Lir : public Mir2Lir {
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
@@ -331,15 +332,21 @@ class X86Mir2Lir : public Mir2Lir {
     std::vector<uint8_t>* ReturnCallFrameInformation();
 
   protected:
-    size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib);
+    size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement,
+                       int reg_r, int reg_x, bool has_sib);
+    uint8_t LowRegisterBits(uint8_t reg);
+    bool NeedsRex(uint8_t reg);
     void EmitPrefix(const X86EncodingMap* entry);
+    void EmitPrefix(const X86EncodingMap* entry, uint8_t reg_r, uint8_t reg_x, uint8_t reg_b);
     void EmitOpcode(const X86EncodingMap* entry);
     void EmitPrefixAndOpcode(const X86EncodingMap* entry);
+    void EmitPrefixAndOpcode(const X86EncodingMap* entry,
+                             uint8_t reg_r, uint8_t reg_x, uint8_t reg_b);
     void EmitDisp(uint8_t base, int disp);
     void EmitModrmThread(uint8_t reg_or_opcode);
     void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp);
     void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, int disp);
-    void EmitImm(const X86EncodingMap* entry, int imm);
+    void EmitImm(const X86EncodingMap* entry, int64_t imm);
     void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg);
     void EmitOpReg(const X86EncodingMap* entry, uint8_t reg);
     void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp);
@@ -362,7 +369,7 @@ class X86Mir2Lir : public Mir2Lir {
     void EmitMemRegImm(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg1, int32_t imm);
     void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
-    void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
+    void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm);
     void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int imm);
     void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index aec39ab529..0421a5967a 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -56,7 +56,7 @@ void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                                 false);
       }
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -118,7 +118,7 @@ void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                                 false);
       }
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 48bff6e6af..1cc16b9e12 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -173,7 +173,10 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  // Avoid using float regs here.
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ConditionCode ccode = mir->meta.ccode;
 
   // The kMirOpSelect has two variants, one for constants and one for moves.
@@ -182,7 +185,7 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
   if (is_constant_case) {
     int true_val = mir->dalvikInsn.vB;
     int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
     /*
      * For ccode == kCondEq:
@@ -203,6 +206,8 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
      *     mov t1, $true_case
      *     cmovz result_reg, t1
      */
+    // FIXME: depending on how you use registers you could get a false != mismatch when dealing
+    // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
     const bool result_reg_same_as_src =
         (rl_src.location == kLocPhysReg && rl_src.reg.GetReg() == rl_result.reg.GetReg());
     const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
@@ -224,7 +229,7 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
     if (true_zero_case || false_zero_case || catch_all_case) {
       ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
       int immediateForTemp = true_zero_case ? false_val : true_val;
-      RegStorage temp1_reg = AllocTemp();
+      RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
       OpRegImm(kOpMov, temp1_reg, immediateForTemp);
 
       OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
@@ -234,9 +239,9 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
   } else {
     RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
     RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
     /*
      * For ccode == kCondEq:
@@ -792,8 +797,8 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
     Clobber(rs_r0);
     LockTemp(rs_r0);
 
-    RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
-    RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
+    RegLocation rl_new_value = LoadValue(rl_src_new_value);
 
     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
       // Mark card for object assuming new value is stored.
@@ -1441,7 +1446,7 @@ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   RegLocation rl_result;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
 
   int data_offset;
   if (size == k64 || size == kDouble) {
@@ -1497,7 +1502,7 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   bool constant_index = rl_index.is_const;
   int32_t constant_index_value = 0;
   if (!constant_index) {
@@ -1880,7 +1885,7 @@ void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                                     RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
@@ -1894,7 +1899,7 @@ void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
   LoadConstant(result_reg, 0);
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kCoreReg);
+  RegStorage check_class = AllocTypedTemp(false, kRefReg);
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
@@ -1972,8 +1977,8 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k
     LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                  class_reg);
     int32_t offset_of_type =
-        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>)
-        * type_idx);
+        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
+        (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
     LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime.
@@ -1992,7 +1997,7 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kRefReg);
 
   // SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 19ad2f8ff1..4d8fd1b283 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -26,124 +26,105 @@
 
 namespace art {
 
-static const RegStorage core_regs_arr_32[] = {
+static constexpr RegStorage core_regs_arr_32[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
 };
-static const RegStorage core_regs_arr_64[] = {
-    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
+static constexpr RegStorage core_regs_arr_64[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
 #ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
 #endif
 };
-static const RegStorage core_regs_arr_64q[] = {
+static constexpr RegStorage core_regs_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q,
 #ifdef TARGET_REX_SUPPORT
-    rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
+    rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q
 #endif
 };
-static const RegStorage sp_regs_arr_32[] = {
+static constexpr RegStorage sp_regs_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
-static const RegStorage sp_regs_arr_64[] = {
+static constexpr RegStorage sp_regs_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-static const RegStorage dp_regs_arr_32[] = {
+static constexpr RegStorage dp_regs_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
-static const RegStorage dp_regs_arr_64[] = {
+static constexpr RegStorage dp_regs_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
-static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
-static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
-static const RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
-static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
-static const RegStorage core_temps_arr_64[] = {
+static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
+static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32};
+static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64};
+static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
+static constexpr RegStorage core_temps_arr_64[] = {
     rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
 #ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11
 #endif
 };
-static const RegStorage core_temps_arr_64q[] = {
+static constexpr RegStorage core_temps_arr_64q[] = {
     rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q,
 #ifdef TARGET_REX_SUPPORT
     rs_r8q, rs_r9q, rs_r10q, rs_r11q
 #endif
 };
-static const RegStorage sp_temps_arr_32[] = {
+static constexpr RegStorage sp_temps_arr_32[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 };
-static const RegStorage sp_temps_arr_64[] = {
+static constexpr RegStorage sp_temps_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-static const RegStorage dp_temps_arr_32[] = {
+static constexpr RegStorage dp_temps_arr_32[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 };
-static const RegStorage dp_temps_arr_64[] = {
+static constexpr RegStorage dp_temps_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
 
-static const RegStorage xp_temps_arr_32[] = {
+static constexpr RegStorage xp_temps_arr_32[] = {
     rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
 };
-static const RegStorage xp_temps_arr_64[] = {
+static constexpr RegStorage xp_temps_arr_64[] = {
     rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7,
 #ifdef TARGET_REX_SUPPORT
     rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15
 #endif
 };
 
-static const std::vector<RegStorage> empty_pool;
-static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
-    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
-static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
-    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
-static const std::vector<RegStorage> core_regs_64q(core_regs_arr_64q,
-    core_regs_arr_64q + sizeof(core_regs_arr_64q) / sizeof(core_regs_arr_64q[0]));
-static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
-    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
-static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
-    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
-static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
-    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
-static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
-    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
-static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
-    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
-static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
-    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
-static const std::vector<RegStorage> reserved_regs_64q(reserved_regs_arr_64q,
-    reserved_regs_arr_64q + sizeof(reserved_regs_arr_64q) / sizeof(reserved_regs_arr_64q[0]));
-static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
-    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
-static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
-    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
-static const std::vector<RegStorage> core_temps_64q(core_temps_arr_64q,
-    core_temps_arr_64q + sizeof(core_temps_arr_64q) / sizeof(core_temps_arr_64q[0]));
-static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
-    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
-static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
-    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
-static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
-    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
-static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
-    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
-
-static const std::vector<RegStorage> xp_temps_32(xp_temps_arr_32,
-    xp_temps_arr_32 + sizeof(xp_temps_arr_32) / sizeof(xp_temps_arr_32[0]));
-static const std::vector<RegStorage> xp_temps_64(xp_temps_arr_64,
-    xp_temps_arr_64 + sizeof(xp_temps_arr_64) / sizeof(xp_temps_arr_64[0]));
+static constexpr ArrayRef<const RegStorage> empty_pool;
+static constexpr ArrayRef<const RegStorage> core_regs_32(core_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> core_regs_64(core_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> core_regs_64q(core_regs_arr_64q);
+static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32);
+static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64);
+static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q);
+static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> core_temps_64(core_temps_arr_64);
+static constexpr ArrayRef<const RegStorage> core_temps_64q(core_temps_arr_64q);
+static constexpr ArrayRef<const RegStorage> sp_temps_32(sp_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> sp_temps_64(sp_temps_arr_64);
+static constexpr ArrayRef<const RegStorage> dp_temps_32(dp_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> dp_temps_64(dp_temps_arr_64);
+
+static constexpr ArrayRef<const RegStorage> xp_temps_32(xp_temps_arr_32);
+static constexpr ArrayRef<const RegStorage> xp_temps_64(xp_temps_arr_64);
 
 RegStorage rs_rX86_SP;
 
@@ -177,6 +158,11 @@ RegLocation X86Mir2Lir::LocCReturn() {
   return x86_loc_c_return;
 }
 
+RegLocation X86Mir2Lir::LocCReturnRef() {
+  // FIXME: return x86_loc_c_return_wide for x86_64 when wide refs supported.
+  return x86_loc_c_return;
+}
+
 RegLocation X86Mir2Lir::LocCReturnWide() {
   return x86_loc_c_return_wide;
 }
@@ -565,9 +551,9 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
   if (Gen64Bit()) {
-    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, empty_pool/*core_regs_64q*/, sp_regs_64,
-                                          dp_regs_64, reserved_regs_64, empty_pool/*reserved_regs_64q*/,
-                                          core_temps_64, empty_pool/*core_temps_64q*/, sp_temps_64, dp_temps_64);
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64,
+                                          dp_regs_64, reserved_regs_64, reserved_regs_64q,
+                                          core_temps_64, core_temps_64q, sp_temps_64, dp_temps_64);
   } else {
     reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32,
                                           dp_regs_32, reserved_regs_32, empty_pool,
@@ -577,7 +563,7 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() {
   // Target-specific adjustments.
 
   // Add in XMM registers.
-  const std::vector<RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32;
+  const ArrayRef<const RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32;
   for (RegStorage reg : *xp_temps) {
     RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
     reginfo_map_.Put(reg.GetReg(), info);
@@ -597,10 +583,28 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() {
     // Redirect 32-bit vector's master storage to 128-bit vector.
     info->SetMaster(xp_reg_info);
 
-    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num);
+    RegStorage dp_reg = RegStorage::FloatSolo64(sp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
     // Redirect 64-bit vector's master storage to 128-bit vector.
     dp_reg_info->SetMaster(xp_reg_info);
+    // Singles should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+  }
+
+  if (Gen64Bit()) {
+    // Alias 32bit W registers to corresponding 64bit X registers.
+    GrowableArray<RegisterInfo*>::Iterator w_it(&reg_pool_->core_regs_);
+    for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) {
+      int x_reg_num = info->GetReg().GetRegNum();
+      RegStorage x_reg = RegStorage::Solo64(x_reg_num);
+      RegisterInfo* x_reg_info = GetRegInfo(x_reg);
+      // 64bit X register's master storage should refer to itself.
+      DCHECK_EQ(x_reg_info, x_reg_info->Master());
+      // Redirect 32bit W master storage to 64bit X.
+      info->SetMaster(x_reg_info);
+      // 32bit W should show a single 32-bit mask bit, at first referring to the low half.
+      DCHECK_EQ(info->StorageMask(), 0x1U);
+    }
   }
 
   // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
@@ -981,7 +985,7 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
   }
 
   // Okay, we are commited to inlining this.
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
 
   // Is the string non-NULL?
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index fed31c1f59..618b3a5987 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -89,7 +89,11 @@ LIR* X86Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
     res = NewLIR2(kX86Xor32RR, r_dest.GetReg(), r_dest.GetReg());
   } else {
     // Note, there is no byte immediate form of a 32 bit immediate move.
-    res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
+    if (r_dest.Is64Bit()) {
+      res = NewLIR2(kX86Mov64RI, r_dest.GetReg(), value);
+    } else {
+      res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
+    }
   }
 
   if (r_dest_save.IsFloat()) {
@@ -181,7 +185,6 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
         LOG(FATAL) << "Bad case in OpRegImm " << op;
     }
   }
-  CHECK(!r_dest_src1.Is64Bit() || X86Mir2Lir::EncodingMap[opcode].kind == kReg64Imm) << "OpRegImm(" << op << ")";
   return NewLIR2(opcode, r_dest_src1.GetReg(), value);
 }
 
@@ -559,7 +562,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
         // We don't know the proper offset for the value, so pick one that will force
         // 4 byte offset.  We will fix this up in the assembler later to have the right
         // value.
-        res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::Solo64(low_reg_val),
+        res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
                            kDouble);
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
@@ -569,7 +572,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
         if (val_lo == 0) {
           res = NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val);
         } else {
-          res = LoadConstantNoClobber(RegStorage::Solo32(low_reg_val), val_lo);
+          res = LoadConstantNoClobber(RegStorage::FloatSolo32(low_reg_val), val_lo);
         }
         if (val_hi != 0) {
           RegStorage r_dest_hi = AllocTempDouble();
@@ -866,7 +869,7 @@ void X86Mir2Lir::AnalyzeBB(BasicBlock * bb) {
 
   for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     int opcode = mir->dalvikInsn.opcode;
-    if (opcode >= kMirOpFirst) {
+    if (MIRGraph::IsPseudoMirOp(opcode)) {
       AnalyzeExtendedMIR(opcode, bb, mir);
     } else {
       AnalyzeMIR(opcode, bb, mir);
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 964422ce4c..bb8df893f8 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -175,6 +175,16 @@ enum X86NativeRegisterPool {
   fr5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
   fr6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
   fr7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
+#ifdef TARGET_REX_SUPPORT
+  fr8  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8,
+  fr9  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9,
+  fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
+  fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
+  fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
+  fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
+  fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
+  fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
+#endif
 
   // xmm registers, double precision aliases.
   dr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
@@ -185,8 +195,18 @@ enum X86NativeRegisterPool {
   dr5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
   dr6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
   dr7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
+#ifdef TARGET_REX_SUPPORT
+  dr8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8,
+  dr9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9,
+  dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
+  dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
+  dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
+  dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
+#endif
 
-  // xmm registers aliases.
+  // xmm registers, quad precision aliases
   xr0  = RegStorage::k128BitSolo | 0,
   xr1  = RegStorage::k128BitSolo | 1,
   xr2  = RegStorage::k128BitSolo | 2,
@@ -195,6 +215,16 @@ enum X86NativeRegisterPool {
   xr5  = RegStorage::k128BitSolo | 5,
   xr6  = RegStorage::k128BitSolo | 6,
   xr7  = RegStorage::k128BitSolo | 7,
+#ifdef TARGET_REX_SUPPORT
+  xr8  = RegStorage::k128BitSolo | 8,
+  xr9  = RegStorage::k128BitSolo | 9,
+  xr10 = RegStorage::k128BitSolo | 10,
+  xr11 = RegStorage::k128BitSolo | 11,
+  xr12 = RegStorage::k128BitSolo | 12,
+  xr13 = RegStorage::k128BitSolo | 13,
+  xr14 = RegStorage::k128BitSolo | 14,
+  xr15 = RegStorage::k128BitSolo | 15,
+#endif
 
   // TODO: as needed, add 256, 512 and 1024-bit xmm views.
 };
@@ -251,6 +281,16 @@ constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
 constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
 constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
 constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
+#ifdef TARGET_REX_SUPPORT
+constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
+constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
+constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
+constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
+constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
+constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
+constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
+constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
+#endif
 
 constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
 constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
@@ -260,6 +300,16 @@ constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
 constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
 constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
 constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
+#ifdef TARGET_REX_SUPPORT
+constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
+constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
+constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
+constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
+constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
+constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
+constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
+constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
+#endif
 
 constexpr RegStorage rs_xr0(RegStorage::kValid | xr0);
 constexpr RegStorage rs_xr1(RegStorage::kValid | xr1);
@@ -269,6 +319,16 @@ constexpr RegStorage rs_xr4(RegStorage::kValid | xr4);
 constexpr RegStorage rs_xr5(RegStorage::kValid | xr5);
 constexpr RegStorage rs_xr6(RegStorage::kValid | xr6);
 constexpr RegStorage rs_xr7(RegStorage::kValid | xr7);
+#ifdef TARGET_REX_SUPPORT
+constexpr RegStorage rs_xr8(RegStorage::kValid | xr8);
+constexpr RegStorage rs_xr9(RegStorage::kValid | xr9);
+constexpr RegStorage rs_xr10(RegStorage::kValid | xr10);
+constexpr RegStorage rs_xr11(RegStorage::kValid | xr11);
+constexpr RegStorage rs_xr12(RegStorage::kValid | xr12);
+constexpr RegStorage rs_xr13(RegStorage::kValid | xr13);
+constexpr RegStorage rs_xr14(RegStorage::kValid | xr14);
+constexpr RegStorage rs_xr15(RegStorage::kValid | xr15);
+#endif
 
 extern X86NativeRegisterPool rX86_ARG0;
 extern X86NativeRegisterPool rX86_ARG1;
@@ -351,10 +411,14 @@ enum X86OpCode {
   opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
   opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
   opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
-  opcode ## 32MR, opcode ## 64MR, opcode ## 32AR, opcode ## 64AR, opcode ## 32TR,  \
-  opcode ## 32RR, opcode ## 32RM, opcode ## 64RM, opcode ## 32RA, opcode ## 64RA, opcode ## 32RT, opcode ## 64RT, \
-  opcode ## 32RI, opcode ## 64RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
-  opcode ## 32RI8, opcode ## 64RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
+  opcode ## 32MR, opcode ## 32AR, opcode ## 32TR,  \
+  opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \
+  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
+  opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8, \
+  opcode ## 64MR, opcode ## 64AR, opcode ## 64TR,  \
+  opcode ## 64RR, opcode ## 64RM, opcode ## 64RA, opcode ## 64RT, \
+  opcode ## 64RI, opcode ## 64MI, opcode ## 64AI, opcode ## 64TI, \
+  opcode ## 64RI8, opcode ## 64MI8, opcode ## 64AI8, opcode ## 64TI8
   BinaryOpCode(kX86Add),
   BinaryOpCode(kX86Or),
   BinaryOpCode(kX86Adc),
@@ -367,23 +431,32 @@ enum X86OpCode {
   kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI,
   kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI,
   kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8,
+  kX86Imul64RRI, kX86Imul64RMI, kX86Imul64RAI,
+  kX86Imul64RRI8, kX86Imul64RMI8, kX86Imul64RAI8,
   kX86Mov8MR, kX86Mov8AR, kX86Mov8TR,
   kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT,
   kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI,
   kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
   kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
   kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
-  kX86Mov32MR, kX86Mov64MR, kX86Mov32AR, kX86Mov64AR, kX86Mov32TR,
-  kX86Mov32RR, kX86Mov32RM, kX86Mov64RM, kX86Mov32RA, kX86Mov64RA, kX86Mov32RT, kX86Mov64RT,
-  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Mov64TI,
+  kX86Mov32MR, kX86Mov32AR, kX86Mov32TR,
+  kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
+  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
   kX86Lea32RM,
   kX86Lea32RA,
+  kX86Mov64MR, kX86Mov64AR, kX86Mov64TR,
+  kX86Mov64RR, kX86Mov64RM, kX86Mov64RA, kX86Mov64RT,
+  kX86Mov64RI, kX86Mov64MI, kX86Mov64AI, kX86Mov64TI,
+  kX86Lea64RM,
+  kX86Lea64RA,
   // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
   //             - lir operands - 0: reg1, 1: reg2, 2: CC
   kX86Cmov32RRC,
+  kX86Cmov64RRC,
   // RMC - Register Memory ConditionCode - cond_opcode reg1, [base + disp]
   //             - lir operands - 0: reg1, 1: base, 2: disp 3: CC
   kX86Cmov32RMC,
+  kX86Cmov64RMC,
 
   // RC - Register CL - opcode reg, CL
   //          - lir operands - 0: reg, 1: CL
@@ -397,7 +470,9 @@ enum X86OpCode {
   opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \
   opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \
   opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \
-  opcode ## 32RC, opcode ## 32MC, opcode ## 32AC
+  opcode ## 32RC, opcode ## 32MC, opcode ## 32AC, \
+  opcode ## 64RI, opcode ## 64MI, opcode ## 64AI, \
+  opcode ## 64RC, opcode ## 64MC, opcode ## 64AC
   BinaryShiftOpCode(kX86Rol),
   BinaryShiftOpCode(kX86Ror),
   BinaryShiftOpCode(kX86Rcl),
@@ -411,12 +486,18 @@ enum X86OpCode {
   kX86Shld32MRI,
   kX86Shrd32RRI,
   kX86Shrd32MRI,
+  kX86Shld64RRI,
+  kX86Shld64MRI,
+  kX86Shrd64RRI,
+  kX86Shrd64MRI,
 #define UnaryOpcode(opcode, reg, mem, array) \
   opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
   opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
-  opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array
+  opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array, \
+  opcode ## 64 ## reg, opcode ## 64 ## mem, opcode ## 64 ## array
   UnaryOpcode(kX86Test, RI, MI, AI),
   kX86Test32RR,
+  kX86Test64RR,
   UnaryOpcode(kX86Not, R, M, A),
   UnaryOpcode(kX86Neg, R, M, A),
   UnaryOpcode(kX86Mul,  DaR, DaM, DaA),
@@ -544,20 +625,20 @@ enum X86OpCode {
 
 /* Instruction assembly field_loc kind */
 enum X86EncodingKind {
-  kData,                                   // Special case for raw data.
-  kNop,                                    // Special case for variable length nop.
-  kNullary,                                // Opcode that takes no arguments.
-  kPrefix2Nullary,                         // Opcode that takes no arguments, but 2 prefixes.
-  kRegOpcode,                              // Shorter form of R instruction kind (opcode+rd)
-  kReg, kReg64, kMem, kArray,              // R, M and A instruction kinds.
-  kMemReg, kMemReg64, kArrayReg, kArrayReg64, kThreadReg,          // MR, AR and TR instruction kinds.
-  kRegReg, kRegMem, kRegArray, kRegThread, kReg64Thread,  // RR, RM, RA and RT instruction kinds.
-  kRegRegStore,                            // RR following the store modrm reg-reg encoding rather than the load.
-  kRegImm, kReg64Imm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
-  kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
-  kMovRegImm,                              // Shorter form move RI.
-  kRegRegImmRev,                           // RRI with first reg in r/m
-  kMemRegImm,                              // MRI instruction kinds.
+  kData,                                    // Special case for raw data.
+  kNop,                                     // Special case for variable length nop.
+  kNullary,                                 // Opcode that takes no arguments.
+  kPrefix2Nullary,                          // Opcode that takes no arguments, but 2 prefixes.
+  kRegOpcode,                               // Shorter form of R instruction kind (opcode+rd)
+  kReg, kMem, kArray,                       // R, M and A instruction kinds.
+  kMemReg, kArrayReg, kThreadReg,           // MR, AR and TR instruction kinds.
+  kRegReg, kRegMem, kRegArray, kRegThread,  // RR, RM, RA and RT instruction kinds.
+  kRegRegStore,                             // RR following the store modrm reg-reg encoding rather than the load.
+  kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
+  kRegRegImm, kRegMemImm, kRegArrayImm,     // RRI, RMI and RAI instruction kinds.
+  kMovRegImm,                               // Shorter form move RI.
+  kRegRegImmRev,                            // RRI with first reg in r/m
+  kMemRegImm,                               // MRI instruction kinds.
   kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
   kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
   kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
@@ -607,6 +688,15 @@ struct X86EncodingMap {
 // 64 Bit Operand Size
 #define REX_W 0x48
 // Extension of the ModR/M reg field
+#define REX_R 0x44
+// Extension of the SIB index field
+#define REX_X 0x42
+// Extension of the ModR/M r/m field, SIB base field, or Opcode reg field
+#define REX_B 0x41
+// Mask extracting the least 3 bits of r0..r15
+#define kRegNumMask32 0x07
+// Value indicating that base or reg is not used
+#define NO_REG 0
 
 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
 #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 2f7e701219..7e50c311da 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -225,24 +225,6 @@ class RegStorage {
     return reg_ & kRegNumMask;
   }
 
-  // Aliased double to low single.
-  RegStorage DoubleToLowSingle() const {
-    DCHECK(IsDouble());
-    return FloatSolo32(GetRegNum() << 1);
-  }
-
-  // Aliased double to high single.
-  RegStorage DoubleToHighSingle() const {
-    DCHECK(IsDouble());
-    return FloatSolo32((GetRegNum() << 1) + 1);
-  }
-
-  // Single to aliased double.
-  RegStorage SingleToDouble() const {
-    DCHECK(IsSingle());
-    return FloatSolo64(GetRegNum() >> 1);
-  }
-
   // Is register number in 0..7?
   bool Low8() const {
     return GetRegNum() < 8;
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 0c5a4ca1d1..bd6bc22531 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -244,9 +244,9 @@ bool MIRGraph::ComputeDominanceFrontier(BasicBlock* bb) {
 
   /* Calculate DF_up */
   for (uint32_t dominated_idx : bb->i_dominated->Indexes()) {
-    BasicBlock *dominated_bb = GetBasicBlock(dominated_idx);
+    BasicBlock* dominated_bb = GetBasicBlock(dominated_idx);
     for (uint32_t df_up_block_idx : dominated_bb->dom_frontier->Indexes()) {
-      BasicBlock *df_up_block = GetBasicBlock(df_up_block_idx);
+      BasicBlock* df_up_block = GetBasicBlock(df_up_block_idx);
       CheckForDominanceFrontier(bb, df_up_block);
     }
   }
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 95b3d86d5f..c4af9cb55c 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -233,8 +233,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
 
     // Special-case handling for format 35c/3rc invokes
     Instruction::Code opcode = mir->dalvikInsn.opcode;
-    int flags = (static_cast<int>(opcode) >= kNumPackedOpcodes)
-        ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode);
+    int flags = IsPseudoMirOp(opcode) ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode);
     if ((flags & Instruction::kInvoke) &&
         (attrs & (DF_FORMAT_35C | DF_FORMAT_3RC))) {
       DCHECK_EQ(next, 0);
@@ -317,8 +316,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) {
        * The Phi set will include all low words or all high
        * words, so we have to treat them specially.
        */
-      bool is_phi = (static_cast<int>(mir->dalvikInsn.opcode) ==
-                    kMirOpPhi);
+      bool is_phi = (static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi);
       RegLocation rl_temp = reg_location_[defs[0]];
       bool defined_fp = rl_temp.defined && rl_temp.fp;
       bool defined_core = rl_temp.defined && rl_temp.core;
@@ -425,6 +423,9 @@ void MIRGraph::InitRegLocations() {
     loc[ct->s_reg_low].defined = true;
   }
 
+  /* Treat Method* as a normal reference */
+  loc[GetMethodSReg()].ref = true;
+
   reg_location_ = loc;
 
   int num_regs = cu_->num_dalvik_registers;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 330456180b..8d4e2838a1 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1135,7 +1135,7 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType
       if (dex_method_idx != DexFile::kDexNoIndex) {
         target_method->dex_method_index = dex_method_idx;
       } else {
-        if (compiling_boot) {
+        if (compiling_boot && !use_dex_cache) {
           target_method->dex_method_index = method->GetDexMethodIndex();
           target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
         }
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 964dfeb5b1..ca956aac36 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -173,7 +173,10 @@ TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
   env_->ExceptionClear();
   jclass jlame = env_->FindClass("java/lang/AbstractMethodError");
   EXPECT_TRUE(env_->IsInstanceOf(exception, jlame));
-  Thread::Current()->ClearException();
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    Thread::Current()->ClearException();
+  }
 }
 
 // TODO: need check-cast test (when stub complete & we can throw/catch
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index e88ed42380..09f2eaea20 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -739,8 +739,8 @@ bool ElfWriterQuick::Write(OatWriter* oat_writer,
                 << " for " << elf_file_->GetPath();
     return false;
   }
-  BufferedOutputStream output_stream(new FileOutputStream(elf_file_));
-  if (!oat_writer->Write(&output_stream)) {
+  std::unique_ptr<BufferedOutputStream> output_stream(new BufferedOutputStream(new FileOutputStream(elf_file_)));
+  if (!oat_writer->Write(output_stream.get())) {
     PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file_->GetPath();
     return false;
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index be53926ac8..ca1239f18d 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -650,34 +650,55 @@ void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
       copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
           (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
     } else {
-      copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
-          (const_cast<byte*>(GetOatAddress(interpreter_to_compiled_code_bridge_offset_))));
       // Use original code if it exists. Otherwise, set the code pointer to the resolution
       // trampoline.
+
+      // Quick entrypoint:
       const byte* quick_code = GetOatAddress(orig->GetQuickOatCodeOffset());
+      bool quick_is_interpreted = false;
       if (quick_code != nullptr &&
           (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
         // We have code for a non-static or initialized method, just use the code.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
       } else if (quick_code == nullptr && orig->IsNative() &&
           (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
         // Non-static or initialized native method missing compiled code, use generic JNI version.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_generic_jni_trampoline_offset_));
+        quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
       } else if (quick_code == nullptr && !orig->IsNative()) {
         // We don't have code at all for a non-native method, use the interpreter.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_to_interpreter_bridge_offset_));
+        quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
+        quick_is_interpreted = true;
       } else {
         CHECK(!orig->GetDeclaringClass()->IsInitialized());
         // We have code for a static method, but need to go through the resolution stub for class
         // initialization.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_resolution_trampoline_offset_));
+        quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
       }
+      copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
+
+      // Portable entrypoint:
       const byte* portable_code = GetOatAddress(orig->GetPortableOatCodeOffset());
-      if (portable_code != nullptr) {
-        copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code);
+      bool portable_is_interpreted = false;
+      if (portable_code != nullptr &&
+          (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
+        // We have code for a non-static or initialized method, just use the code.
+      } else if (portable_code == nullptr && orig->IsNative() &&
+          (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
+        // Non-static or initialized native method missing compiled code, use generic JNI version.
+        // TODO: generic JNI support for LLVM.
+        portable_code = GetOatAddress(portable_resolution_trampoline_offset_);
+      } else if (portable_code == nullptr && !orig->IsNative()) {
+        // We don't have code at all for a non-native method, use the interpreter.
+        portable_code = GetOatAddress(portable_to_interpreter_bridge_offset_);
+        portable_is_interpreted = true;
       } else {
-        copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_resolution_trampoline_offset_));
+        CHECK(!orig->GetDeclaringClass()->IsInitialized());
+        // We have code for a static method, but need to go through the resolution stub for class
+        // initialization.
+        portable_code = GetOatAddress(portable_resolution_trampoline_offset_);
       }
+      copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code);
+
+      // JNI entrypoint:
       if (orig->IsNative()) {
         // The native method's pointer is set to a stub to lookup via dlsym.
         // Note this is not the code_ pointer, that is handled above.
@@ -688,6 +709,15 @@ void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
         const byte* native_gc_map = GetOatAddress(native_gc_map_offset);
         copy->SetNativeGcMap<kVerifyNone>(reinterpret_cast<const uint8_t*>(native_gc_map));
       }
+
+      // Interpreter entrypoint:
+      // Set the interpreter entrypoint depending on whether there is compiled code or not.
+      uint32_t interpreter_code = (quick_is_interpreted && portable_is_interpreted)
+          ? interpreter_to_interpreter_bridge_offset_
+          : interpreter_to_compiled_code_bridge_offset_;
+      copy->SetEntryPointFromInterpreter<kVerifyNone>(
+          reinterpret_cast<EntryPointFromInterpreter*>(
+              const_cast<byte*>(GetOatAddress(interpreter_code))));
     }
   }
 }
@@ -742,30 +772,42 @@ void ImageWriter::PatchOatCodeAndMethods() {
     const CompilerDriver::CallPatchInformation* patch = code_to_patch[i];
     ArtMethod* target = GetTargetMethod(patch);
     uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target));
+    DCHECK_NE(quick_code, 0U) << PrettyMethod(target);
     uintptr_t code_base = reinterpret_cast<uintptr_t>(&oat_file_->GetOatHeader());
     uintptr_t code_offset = quick_code - code_base;
+    bool is_quick_offset = false;
+    if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge())) {
+      is_quick_offset = true;
+      code_offset = quick_to_interpreter_bridge_offset_;
+    } else if (quick_code ==
+        reinterpret_cast<uintptr_t>(class_linker->GetQuickGenericJniTrampoline())) {
+      CHECK(target->IsNative());
+      is_quick_offset = true;
+      code_offset = quick_generic_jni_trampoline_offset_;
+    }
+    uintptr_t value;
     if (patch->IsRelative()) {
       // value to patch is relative to the location being patched
       const void* quick_oat_code =
         class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
                                          patch->GetReferrerClassDefIdx(),
                                          patch->GetReferrerMethodIdx());
+      if (is_quick_offset) {
+        // If its a quick offset it means that we are doing a relative patch from the class linker
+        // oat_file to the image writer oat_file so we need to adjust the quick oat code to be the
+        // one in the image writer oat_file.
+        quick_code = PointerToLowMemUInt32(GetOatAddress(code_offset));
+        quick_oat_code =
+            reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(quick_oat_code) +
+                reinterpret_cast<uintptr_t>(oat_data_begin_) - code_base);
+      }
       uintptr_t base = reinterpret_cast<uintptr_t>(quick_oat_code);
       uintptr_t patch_location = base + patch->GetLiteralOffset();
-      uintptr_t value = quick_code - patch_location + patch->RelativeOffset();
-      SetPatchLocation(patch, value);
+      value = quick_code - patch_location + patch->RelativeOffset();
     } else {
-      if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge()) ||
-          quick_code == reinterpret_cast<uintptr_t>(class_linker->GetQuickGenericJniTrampoline())) {
-        if (target->IsNative()) {
-          // generic JNI, not interpreter bridge from GetQuickOatCodeFor().
-          code_offset = quick_generic_jni_trampoline_offset_;
-        } else {
-          code_offset = quick_to_interpreter_bridge_offset_;
-        }
-      }
-      SetPatchLocation(patch, PointerToLowMemUInt32(GetOatAddress(code_offset)));
+      value = PointerToLowMemUInt32(GetOatAddress(code_offset));
     }
+    SetPatchLocation(patch, value);
   }
 
   const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 9927fe1aa3..8f4eddbea3 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -60,7 +60,7 @@ class JniCompilerTest : public CommonCompilerTest {
     } else {
       method = c->FindVirtualMethod(method_name, method_sig);
     }
-    ASSERT_TRUE(method != NULL) << method_name << " " << method_sig;
+    ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig;
     if (method->GetEntryPointFromQuickCompiledCode() == nullptr) {
       ASSERT_TRUE(method->GetEntryPointFromPortableCompiledCode() == nullptr);
       CompileMethod(method);
@@ -88,16 +88,16 @@ class JniCompilerTest : public CommonCompilerTest {
     // JNI operations after runtime start.
     env_ = Thread::Current()->GetJniEnv();
     jklass_ = env_->FindClass("MyClassNatives");
-    ASSERT_TRUE(jklass_ != NULL) << method_name << " " << method_sig;
+    ASSERT_TRUE(jklass_ != nullptr) << method_name << " " << method_sig;
 
     if (direct) {
       jmethod_ = env_->GetStaticMethodID(jklass_, method_name, method_sig);
     } else {
       jmethod_ = env_->GetMethodID(jklass_, method_name, method_sig);
     }
-    ASSERT_TRUE(jmethod_ != NULL) << method_name << " " << method_sig;
+    ASSERT_TRUE(jmethod_ != nullptr) << method_name << " " << method_sig;
 
-    if (native_fnptr != NULL) {
+    if (native_fnptr != nullptr) {
       JNINativeMethod methods[] = { { method_name, method_sig, native_fnptr } };
       ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1))
               << method_name << " " << method_sig;
@@ -107,7 +107,7 @@ class JniCompilerTest : public CommonCompilerTest {
 
     jmethodID constructor = env_->GetMethodID(jklass_, "<init>", "()V");
     jobj_ = env_->NewObject(jklass_, constructor);
-    ASSERT_TRUE(jobj_ != NULL) << method_name << " " << method_sig;
+    ASSERT_TRUE(jobj_ != nullptr) << method_name << " " << method_sig;
   }
 
  public:
@@ -125,13 +125,14 @@ jclass JniCompilerTest::jklass_;
 jobject JniCompilerTest::jobj_;
 jobject JniCompilerTest::class_loader_;
 
+
 int gJava_MyClassNatives_foo_calls = 0;
 void Java_MyClassNatives_foo(JNIEnv* env, jobject thisObj) {
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   Locks::mutator_lock_->AssertNotHeld(Thread::Current());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_foo_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -151,8 +152,8 @@ TEST_F(JniCompilerTest, CompileAndRunNoArgMethod) {
 
 TEST_F(JniCompilerTest, CompileAndRunIntMethodThroughStub) {
   TEST_DISABLED_FOR_PORTABLE();
-  SetUpForTest(false, "bar", "(I)I",
-               NULL /* calling through stub will link with &Java_MyClassNatives_bar */);
+  SetUpForTest(false, "bar", "(I)I", nullptr);
+  // calling through stub will link with &Java_MyClassNatives_bar
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
@@ -168,8 +169,8 @@ TEST_F(JniCompilerTest, CompileAndRunIntMethodThroughStub) {
 
 TEST_F(JniCompilerTest, CompileAndRunStaticIntMethodThroughStub) {
   TEST_DISABLED_FOR_PORTABLE();
-  SetUpForTest(true, "sbar", "(I)I",
-               NULL /* calling through stub will link with &Java_MyClassNatives_sbar */);
+  SetUpForTest(true, "sbar", "(I)I", nullptr);
+  // calling through stub will link with &Java_MyClassNatives_sbar
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
@@ -188,7 +189,7 @@ jint Java_MyClassNatives_fooI(JNIEnv* env, jobject thisObj, jint x) {
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooI_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -215,7 +216,7 @@ jint Java_MyClassNatives_fooII(JNIEnv* env, jobject thisObj, jint x, jint y) {
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooII_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -243,7 +244,7 @@ jlong Java_MyClassNatives_fooJJ(JNIEnv* env, jobject thisObj, jlong x, jlong y)
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooJJ_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -272,7 +273,7 @@ jdouble Java_MyClassNatives_fooDD(JNIEnv* env, jobject thisObj, jdouble x, jdoub
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooDD_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -302,7 +303,7 @@ jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject thisObj, jlong
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooJJ_synchronized_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -329,7 +330,7 @@ jobject Java_MyClassNatives_fooIOO(JNIEnv* env, jobject thisObj, jint x, jobject
   // 3 = this + y + z
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooIOO_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -353,28 +354,28 @@ TEST_F(JniCompilerTest, CompileAndRunIntObjectObjectMethod) {
                reinterpret_cast<void*>(&Java_MyClassNatives_fooIOO));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooIOO_calls);
-  jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, NULL, NULL);
+  jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(1, gJava_MyClassNatives_fooIOO_calls);
 
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, NULL, jklass_);
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, nullptr, jklass_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(2, gJava_MyClassNatives_fooIOO_calls);
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, NULL, jklass_);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, nullptr, jklass_);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(3, gJava_MyClassNatives_fooIOO_calls);
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, NULL, jklass_);
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, nullptr, jklass_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(4, gJava_MyClassNatives_fooIOO_calls);
 
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, jklass_, NULL);
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, jklass_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(5, gJava_MyClassNatives_fooIOO_calls);
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, jklass_, NULL);
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, jklass_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(6, gJava_MyClassNatives_fooIOO_calls);
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, jklass_, NULL);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, jklass_, nullptr);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(7, gJava_MyClassNatives_fooIOO_calls);
 }
 
@@ -383,7 +384,7 @@ jint Java_MyClassNatives_fooSII(JNIEnv* env, jclass klass, jint x, jint y) {
   // 1 = klass
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != NULL);
+  EXPECT_TRUE(klass != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSII_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -407,7 +408,7 @@ jdouble Java_MyClassNatives_fooSDD(JNIEnv* env, jclass klass, jdouble x, jdouble
   // 1 = klass
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != NULL);
+  EXPECT_TRUE(klass != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSDD_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -437,7 +438,7 @@ jobject Java_MyClassNatives_fooSIOO(JNIEnv* env, jclass klass, jint x, jobject y
   // 3 = klass + y + z
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != NULL);
+  EXPECT_TRUE(klass != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSIOO_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -462,28 +463,28 @@ TEST_F(JniCompilerTest, CompileAndRunStaticIntObjectObjectMethod) {
                reinterpret_cast<void*>(&Java_MyClassNatives_fooSIOO));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooSIOO_calls);
-  jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, NULL);
+  jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(1, gJava_MyClassNatives_fooSIOO_calls);
 
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, jobj_);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(2, gJava_MyClassNatives_fooSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, NULL, jobj_);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, nullptr, jobj_);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(3, gJava_MyClassNatives_fooSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, NULL, jobj_);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(4, gJava_MyClassNatives_fooSIOO_calls);
 
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, NULL);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(5, gJava_MyClassNatives_fooSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, NULL);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(6, gJava_MyClassNatives_fooSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, NULL);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, nullptr);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(7, gJava_MyClassNatives_fooSIOO_calls);
 }
 
@@ -492,7 +493,7 @@ jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject
   // 3 = klass + y + z
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != NULL);
+  EXPECT_TRUE(klass != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSSIOO_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -516,28 +517,28 @@ TEST_F(JniCompilerTest, CompileAndRunStaticSynchronizedIntObjectObjectMethod) {
                reinterpret_cast<void*>(&Java_MyClassNatives_fooSSIOO));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooSSIOO_calls);
-  jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, NULL);
+  jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(1, gJava_MyClassNatives_fooSSIOO_calls);
 
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, jobj_);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(2, gJava_MyClassNatives_fooSSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, NULL, jobj_);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, nullptr, jobj_);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(3, gJava_MyClassNatives_fooSSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, NULL, jobj_);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(4, gJava_MyClassNatives_fooSSIOO_calls);
 
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, NULL);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(5, gJava_MyClassNatives_fooSSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, NULL);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(6, gJava_MyClassNatives_fooSSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, NULL);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, nullptr);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(7, gJava_MyClassNatives_fooSSIOO_calls);
 }
 
@@ -591,7 +592,7 @@ TEST_F(JniCompilerTest, ExceptionHandling) {
 
 jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) {
   if (i <= 0) {
-    // We want to check raw Object*/Array* below
+    // We want to check raw Object* / Array* below
     ScopedObjectAccess soa(env);
 
     // Build stack trace
@@ -599,7 +600,7 @@ jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) {
     jobjectArray ste_array = Thread::InternalStackTraceToStackTraceElementArray(soa, internal);
     mirror::ObjectArray<mirror::StackTraceElement>* trace_array =
         soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(ste_array);
-    EXPECT_TRUE(trace_array != NULL);
+    EXPECT_TRUE(trace_array != nullptr);
     EXPECT_EQ(11, trace_array->GetLength());
 
     // Check stack trace entries have expected values
@@ -615,9 +616,9 @@ jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) {
     return 0;
   } else {
     jclass jklass = env->FindClass("MyClassNatives");
-    EXPECT_TRUE(jklass != NULL);
+    EXPECT_TRUE(jklass != nullptr);
     jmethodID jmethod = env->GetMethodID(jklass, "fooI", "(I)I");
-    EXPECT_TRUE(jmethod != NULL);
+    EXPECT_TRUE(jmethod != nullptr);
 
     // Recurse with i - 1
     jint result = env->CallNonvirtualIntMethod(thisObj, jklass, jmethod, i - 1);
@@ -721,7 +722,7 @@ TEST_F(JniCompilerTest, GetText) {
 
 TEST_F(JniCompilerTest, GetSinkPropertiesNative) {
   TEST_DISABLED_FOR_PORTABLE();
-  SetUpForTest(false, "getSinkPropertiesNative", "(Ljava/lang/String;)[Ljava/lang/Object;", NULL);
+  SetUpForTest(false, "getSinkPropertiesNative", "(Ljava/lang/String;)[Ljava/lang/Object;", nullptr);
   // This space intentionally left blank. Just testing compilation succeeds.
 }
 
@@ -804,7 +805,7 @@ TEST_F(JniCompilerTest, UpcallArgumentTypeChecking_Static) {
 jfloat Java_MyClassNatives_checkFloats(JNIEnv* env, jobject thisObj, jfloat f1, jfloat f2) {
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
@@ -826,12 +827,12 @@ TEST_F(JniCompilerTest, CompileAndRunFloatFloatMethod) {
 }
 
 void Java_MyClassNatives_checkParameterAlign(JNIEnv* env, jobject thisObj, jint i1, jlong l1) {
-  /*EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());*/
+//  EXPECT_EQ(kNative, Thread::Current()->GetState());
+//  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
+//  EXPECT_TRUE(thisObj != nullptr);
+//  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
+//  ScopedObjectAccess soa(Thread::Current());
+//  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(i1, 1234);
   EXPECT_EQ(l1, INT64_C(0x12345678ABCDEF0));
 }
@@ -879,7 +880,7 @@ void Java_MyClassNatives_maxParamNumber(JNIEnv* env, jobject thisObj,
     jobject o248, jobject o249, jobject o250, jobject o251, jobject o252, jobject o253) {
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_GE(255U, Thread::Current()->NumStackReferences());
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 649a80ff68..f0c0ed72bf 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -143,9 +143,10 @@ ManagedRegister ArmJniCallingConvention::ReturnScratchRegister() const {
 
 size_t ArmJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index ffd27ee37d..0a00d7d8ac 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -95,7 +95,7 @@ FrameOffset Arm64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
   CHECK(IsCurrentParamOnStack());
   FrameOffset result =
       FrameOffset(displacement_.Int32Value() +   // displacement
-                  kFramePointerSize +                 // Method*
+                  sizeof(StackReference<mirror::ArtMethod>) +  // Method ref
                   (itr_slots_ * sizeof(uint32_t)));  // offset into in args
   return result;
 }
@@ -196,9 +196,10 @@ ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
 
 size_t Arm64JniCallingConvention::FrameSize() {
   // Method*, callee save area size, local reference segment state
-  size_t frame_data_size = ((1 + CalleeSaveRegisters().size()) * kFramePointerSize) + sizeof(uint32_t);
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      CalleeSaveRegisters().size() * kFramePointerSize + sizeof(uint32_t);
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 2a6e7d96cd..efc0b42db4 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -319,7 +319,8 @@ class JniCallingConvention : public CallingConvention {
 
   // Position of handle scope and interior fields
   FrameOffset HandleScopeOffset() const {
-    return FrameOffset(this->displacement_.Int32Value() + frame_pointer_size_);  // above Method*
+    return FrameOffset(this->displacement_.Int32Value() + sizeof(StackReference<mirror::ArtMethod>));
+    // above Method reference
   }
 
   FrameOffset HandleScopeLinkOffset() const {
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 0402fe6eb8..f7a7be7304 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -147,9 +147,10 @@ ManagedRegister MipsJniCallingConvention::ReturnScratchRegister() const {
 
 size_t MipsJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 97b4cdf8ac..9bf7d0f071 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -124,9 +124,10 @@ uint32_t X86JniCallingConvention::CoreSpillMask() const {
 
 size_t X86JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 4871c879a8..5febed24fe 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -96,7 +96,7 @@ ManagedRegister X86_64ManagedRuntimeCallingConvention::CurrentParamRegister() {
 
 FrameOffset X86_64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
   return FrameOffset(displacement_.Int32Value() +   // displacement
-                     kFramePointerSize +                 // Method*
+                     sizeof(StackReference<mirror::ArtMethod>) +  // Method ref
                      (itr_slots_ * sizeof(uint32_t)));  // offset into in args
 }
 
@@ -139,9 +139,10 @@ uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
 
 size_t X86_64JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index 25c9b20514..f8dca66de0 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -1868,6 +1868,10 @@ llvm::Value* GBCExpanderPass::EmitLoadStaticStorage(uint32_t dex_pc,
 
   phi->addIncoming(storage_object_addr, block_check_init);
   phi->addIncoming(loaded_storage_object_addr, block_after_load_static);
+
+  // Ensure load of status and load of value don't re-order.
+  irb_.CreateMemoryBarrier(art::kLoadLoad);
+
   return phi;
 }
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 6812f3c9df..49cf71b7eb 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -180,7 +180,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) {
   EXPECT_EQ(80U, sizeof(OatHeader));
   EXPECT_EQ(8U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(80 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 53e7bbe402..7a336204b6 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -26,6 +26,18 @@
 
 namespace art {
 
+static void DumpBitVector(BitVector* vector,
+                          std::ostream& buffer,
+                          size_t count,
+                          const char* prefix) {
+  buffer << prefix;
+  buffer << '(';
+  for (size_t i = 0; i < count; ++i) {
+    buffer << vector->IsBitSet(i);
+  }
+  buffer << ")\n";
+}
+
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -43,12 +55,13 @@ static void TestCode(const uint16_t* data, const char* expected) {
   for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     buffer << "Block " << block->GetBlockId() << std::endl;
+    size_t ssa_values = liveness.GetNumberOfSsaValues();
     BitVector* live_in = liveness.GetLiveInSet(*block);
-    live_in->Dump(buffer, "  live in: ");
+    DumpBitVector(live_in, buffer, ssa_values, "  live in: ");
     BitVector* live_out = liveness.GetLiveOutSet(*block);
-    live_out->Dump(buffer, "  live out: ");
+    DumpBitVector(live_out, buffer, ssa_values, "  live out: ");
     BitVector* kill = liveness.GetKillSet(*block);
-    kill->Dump(buffer, "  kill: ");
+    DumpBitVector(kill, buffer, ssa_values, "  kill: ");
   }
   ASSERT_STREQ(expected, buffer.str().c_str());
 }
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 33084df94b..1284a97cd1 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -95,14 +95,26 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
     // All predecessors have already been visited because we are visiting in reverse post order.
     // We merge the values of all locals, creating phis if those values differ.
     for (size_t local = 0; local < current_locals_->Size(); local++) {
+      bool one_predecessor_has_no_value = false;
       bool is_different = false;
       HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(0), local);
-      for (size_t i = 1; i < block->GetPredecessors().Size(); i++) {
-        if (ValueOfLocal(block->GetPredecessors().Get(i), local) != value) {
+
+      for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
+        HInstruction* current = ValueOfLocal(block->GetPredecessors().Get(i), local);
+        if (current == nullptr) {
+//          one_predecessor_has_no_value = true;
+//          break;
+        } else if (current != value) {
           is_different = true;
-          break;
         }
       }
+
+      if (one_predecessor_has_no_value) {
+        // If one predecessor has no value for this local, we trust the verifier has
+        // successfully checked that there is a store dominating any read after this block.
+        continue;
+      }
+
       if (is_different) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid);
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index d10461980d..485ea279ad 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -459,4 +459,34 @@ TEST(SsaTest, DeadLocal) {
   TestCode(data, expected);
 }
 
+TEST(SsaTest, LocalInIf) {
+  // Test that we do not create a phi in the join block when one predecessor
+  // does not update the local.
+  const char* expected =
+    "BasicBlock 0, succ: 1\n"
+    "  0: IntConstant 0 [3, 3]\n"
+    "  1: IntConstant 4\n"
+    "  2: Goto\n"
+    "BasicBlock 1, pred: 0, succ: 2, 5\n"
+    "  3: Equal(0, 0) [4]\n"
+    "  4: If(3)\n"
+    "BasicBlock 2, pred: 1, succ: 3\n"
+    "  5: Goto\n"
+    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "  6: ReturnVoid\n"
+    "BasicBlock 4, pred: 3\n"
+    "  7: Exit\n"
+    // Synthesized block to avoid critical edge.
+    "BasicBlock 5, pred: 1, succ: 3\n"
+    "  8: Goto\n";
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::CONST_4 | 4 << 12 | 1 << 8,
+    Instruction::RETURN_VOID);
+
+  TestCode(data, expected);
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 27188b2331..009b227209 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -530,7 +530,7 @@ void Arm64Assembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scrat
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(scratch.AsCoreRegister(), SP, base.Int32Value());
+  LoadWFromOffset(kLoadWord, scratch.AsOverlappingCoreRegisterLow(), SP, base.Int32Value());
   LoadFromOffset(scratch.AsCoreRegister(), scratch.AsCoreRegister(), offs.Int32Value());
   ___ Blr(reg_x(scratch.AsCoreRegister()));
 }
@@ -656,16 +656,17 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
   // trashed by native code.
   ___ Mov(reg_x(ETR), reg_x(TR));
 
-  // Increate frame to required size - must be at least space to push Method*.
+  // Increase frame to required size - must be at least space to push StackReference<Method>.
   CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
   size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
   IncreaseFrameSize(adjust);
 
-  // Write Method*.
-  StoreToOffset(X0, SP, 0);
+  // Write StackReference<Method>.
+  DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
+  StoreWToOffset(StoreOperandType::kStoreWord, W0, SP, 0);
 
   // Write out entry spills
-  int32_t offset = frame_size + kFramePointerSize;
+  int32_t offset = frame_size + sizeof(StackReference<mirror::ArtMethod>);
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
     if (reg.IsNoRegister()) {
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
new file mode 100644
index 0000000000..2d70b7dd31
--- /dev/null
+++ b/compiler/utils/array_ref.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARRAY_REF_H_
+#define ART_COMPILER_UTILS_ARRAY_REF_H_
+
+#include <type_traits>
+#include <vector>
+
+#include "base/logging.h"
+
+namespace art {
+
+/**
+ * @brief A container that references an array.
+ *
+ * @details The template class ArrayRef provides a container that references
+ * an external array. This external array must remain alive while the ArrayRef
+ * object is in use. The external array may be a std::vector<>-backed storage
+ * or any other contiguous chunk of memory but that memory must remain valid,
+ * i.e. the std::vector<> must not be resized for example.
+ *
+ * Except for copy/assign and insert/erase/capacity functions, the interface
+ * is essentially the same as std::vector<>. Since we don't want to throw
+ * exceptions, at() is also excluded.
+ */
+template <typename T>
+class ArrayRef {
+ private:
+  struct tag { };
+
+ public:
+  typedef T value_type;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T* iterator;
+  typedef const T* const_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef ptrdiff_t difference_type;
+  typedef size_t size_type;
+
+  // Constructors.
+
+  constexpr ArrayRef()
+      : array_(nullptr), size_(0u) {
+  }
+
+  template <size_t size>
+  constexpr ArrayRef(T (&array)[size])
+    : array_(array), size_(size) {
+  }
+
+  template <typename U, size_t size>
+  constexpr ArrayRef(U (&array)[size],
+                     typename std::enable_if<std::is_same<T, const U>::value, tag>::type t = tag())
+    : array_(array), size_(size) {
+  }
+
+  constexpr ArrayRef(T* array, size_t size)
+      : array_(array), size_(size) {
+  }
+
+  template <typename U>
+  constexpr ArrayRef(U* array, size_t size,
+                     typename std::enable_if<std::is_same<T, const U>::value, tag>::type t = tag())
+      : array_(array), size_(size) {
+  }
+
+  explicit ArrayRef(std::vector<T>& v)
+      : array_(v.data()), size_(v.size()) {
+  }
+
+  template <typename U>
+  ArrayRef(const std::vector<U>& v,
+           typename std::enable_if<std::is_same<T, const U>::value, tag>::tag t = tag())
+      : array_(v.data()), size_(v.size()) {
+  }
+
+  // Assignment operators.
+
+  ArrayRef& operator=(const ArrayRef& other) {
+    array_ = other.array_;
+    size_ = other.size_;
+    return *this;
+  }
+
+  template <typename U>
+  typename std::enable_if<std::is_same<T, const U>::value, ArrayRef>::type&
+  operator=(const ArrayRef<U>& other) {
+    return *this = ArrayRef(other);
+  }
+
+  // Destructor.
+  ~ArrayRef() = default;
+
+  // Iterators.
+  iterator begin() { return array_; }
+  const_iterator begin() const { return array_; }
+  const_iterator cbegin() const { return array_; }
+  iterator end() { return array_ + size_; }
+  const_iterator end() const { return array_ + size_; }
+  const_iterator cend() const { return array_ + size_; }
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+  const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+  const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
+
+  // Size.
+  size_type size() const { return size_; }
+  bool empty() const { return size() == 0u; }
+
+  // Element access. NOTE: Not providing at().
+
+  reference operator[](size_type n) {
+    DCHECK_LT(n, size_);
+    return array_[n];
+  }
+
+  const_reference operator[](size_type n) const {
+    DCHECK_LT(n, size_);
+    return array_[n];
+  }
+
+  reference front() {
+    DCHECK_NE(size_, 0u);
+    return array_[0];
+  }
+
+  const_reference front() const {
+    DCHECK_NE(size_, 0u);
+    return array_[0];
+  }
+
+  reference back() {
+    DCHECK_NE(size_, 0u);
+    return array_[size_ - 1u];
+  }
+
+  const_reference back() const {
+    DCHECK_NE(size_, 0u);
+    return array_[size_ - 1u];
+  }
+
+  value_type* data() { return array_; }
+  const value_type* data() const { return array_; }
+
+ private:
+  T* array_;
+  size_t size_;
+};
+
+}  // namespace art
+
+
+#endif  // ART_COMPILER_UTILS_ARRAY_REF_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 0791c63f90..56c6536fe5 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1411,10 +1411,12 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
   }
   // return address then method on stack
   addl(ESP, Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) +
-                      kFramePointerSize /*method*/ + kFramePointerSize /*return address*/));
+                      sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
+                      kFramePointerSize /*return address*/));
   pushl(method_reg.AsX86().AsCpuRegister());
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    movl(Address(ESP, frame_size + kFramePointerSize + (i * kFramePointerSize)),
+    movl(Address(ESP, frame_size + sizeof(StackReference<mirror::ArtMethod>) +
+                 (i * kFramePointerSize)),
          entry_spills.at(i).AsX86().AsCpuRegister());
   }
 }
@@ -1422,7 +1424,8 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
+  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
+                      sizeof(StackReference<mirror::ArtMethod>)));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     popl(spill_regs.at(i).AsX86().AsCpuRegister());
   }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 0ede8755e3..a14551c3b7 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -59,7 +59,6 @@ void X86_64Assembler::call(Label* label) {
   EmitLabel(label, kSize);
 }
 
-
 void X86_64Assembler::pushq(CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
@@ -1652,8 +1651,12 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
   }
   // return address then method on stack
   addq(CpuRegister(RSP), Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) +
-                                   kFramePointerSize /*method*/ + kFramePointerSize /*return address*/));
-  pushq(method_reg.AsX86_64().AsCpuRegister());
+                                   sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
+                                   kFramePointerSize /*return address*/));
+
+  DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
+  subq(CpuRegister(RSP), Immediate(4));
+  movl(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     ManagedRegisterSpill spill = entry_spills.at(i);
@@ -1732,7 +1735,7 @@ void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size)
 void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
   X86_64ManagedRegister src = msrc.AsX86_64();
   CHECK(src.IsCpuRegister());
-  movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
+  movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
 }
 
 void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
@@ -2070,7 +2073,7 @@ void X86_64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister
 
 void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
   CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  movq(scratch, Address(CpuRegister(RSP), base));
+  movl(scratch, Address(CpuRegister(RSP), base));
   call(Address(scratch, offset));
 }