33 files changed, 821 insertions, 337 deletions
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index f4e28e6c0e..398c7f641f 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -27,6 +27,7 @@
 #include "mir_method_info.h"
 #include "utils/arena_bit_vector.h"
 #include "utils/growable_array.h"
+#include "reg_location.h"
 #include "reg_storage.h"
 
 namespace art {
@@ -492,39 +493,6 @@ class ChildBlockIterator {
 };
 
 /*
- * Whereas a SSA name describes a definition of a Dalvik vreg, the RegLocation describes
- * the type of an SSA name (and, can also be used by code generators to record where the
- * value is located (i.e. - physical register, frame, spill, etc.).  For each SSA name (SReg)
- * there is a RegLocation.
- * A note on SSA names:
- *   o SSA names for Dalvik vRegs v0..vN will be assigned 0..N.  These represent the "vN_0"
- *     names.  Negative SSA names represent special values not present in the Dalvik byte code.
- *     For example, SSA name -1 represents an invalid SSA name, and SSA name -2 represents the
- *     the Method pointer.  SSA names < -2 are reserved for future use.
- *   o The vN_0 names for non-argument Dalvik should in practice never be used (as they would
- *     represent the read of an undefined local variable).  The first definition of the
- *     underlying Dalvik vReg will result in a vN_1 name.
- *
- * FIXME: The orig_sreg field was added as a workaround for llvm bitcode generation.  With
- * the latest restructuring, we should be able to remove it and rely on s_reg_low throughout.
- */
-struct RegLocation {
-  RegLocationType location:3;
-  unsigned wide:1;
-  unsigned defined:1;   // Do we know the type?
-  unsigned is_const:1;  // Constant, value in mir_graph->constant_values[].
-  unsigned fp:1;        // Floating point?
-  unsigned core:1;      // Non-floating point?
-  unsigned ref:1;       // Something GC cares about.
-  unsigned high_word:1;  // High word of pair?
-  unsigned home:1;      // Does this represent the home location?
-  RegStorage reg;       // Encoded physical registers.
-  int16_t s_reg_low;    // SSA name for low Dalvik word.
-  int16_t orig_sreg;    // TODO: remove after Bitcode gen complete
-                        // and consolidate usage w/ s_reg_low.
-};
-
-/*
  * Collection of information describing an invoke, and the destination of
  * the subsequent MOVE_RESULT (if applicable).  Collected as a unit to enable
  * more efficient invoke code generation.
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index b0865f1c3f..ea7f439cfb 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -179,6 +179,8 @@ constexpr RegStorage rs_wLR(RegStorage::kValid | rwLR);
 // RegisterLocation templates return values (following the hard-float calling convention).
 const RegLocation arm_loc_c_return =
     {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_ref =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_wide =
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_float =
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index cfdf926fba..3e0b3cf314 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -132,7 +132,7 @@ void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
+  LoadBaseIndexed(table_base, As64BitReg(key_reg), disp_reg, 2, k32);
 
   // Get base branch address.
   RegStorage branch_reg = AllocTempWide();
@@ -195,7 +195,7 @@ void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   // TUNING: How much performance we get when we inline this?
   // Since we've already flush all register.
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_w0);
+  LoadValueDirectFixed(rl_src, rs_x0);  // = TargetRefReg(kArg0)
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -243,7 +243,7 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
   // TUNING: How much performance we get when we inline this?
   // Since we've already flush all register.
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_w0);  // Get obj
+  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -291,12 +291,12 @@ void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
  */
 void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
   RegStorage reg_card_base = AllocTempWide();
-  RegStorage reg_card_no = AllocTemp();
+  RegStorage reg_card_no = AllocTempWide();  // Needs to be wide as addr is ref=64b
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
   LoadWordDisp(rs_xSELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
-  StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base),
+  StoreBaseIndexed(reg_card_base, reg_card_no, As32BitReg(reg_card_base),
                    0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
   branch_over->target = target;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index a9340a5ccf..f71713fc96 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -24,13 +24,8 @@
 
 namespace art {
 
-class Arm64Mir2Lir : public Mir2Lir {
+class Arm64Mir2Lir FINAL : public Mir2Lir {
  protected:
-  // If we detect a size error, FATAL out.
-  static constexpr bool kFailOnSizeError = false && kIsDebugBuild;
-  // If we detect a size error, report to LOG.
-  static constexpr bool kReportSizeError = false && kIsDebugBuild;
-
   // TODO: consolidate 64-bit target support.
   class InToRegStorageMapper {
    public:
@@ -102,7 +97,19 @@ class Arm64Mir2Lir : public Mir2Lir {
                            int offset, int check_value, LIR* target) OVERRIDE;
 
     // Required for target - register utilities.
-    RegStorage TargetReg(SpecialTargetRegister reg);
+    RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
+    RegStorage TargetReg(SpecialTargetRegister symbolic_reg, bool is_wide) OVERRIDE {
+      RegStorage reg = TargetReg(symbolic_reg);
+      if (is_wide) {
+        return (reg.Is64Bit()) ? reg : As64BitReg(reg);
+      } else {
+        return (reg.Is32Bit()) ? reg : As32BitReg(reg);
+      }
+    }
+    RegStorage TargetRefReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
+      RegStorage reg = TargetReg(symbolic_reg);
+      return (reg.Is64Bit() ? reg : As64BitReg(reg));
+    }
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 56fb2dd018..18a4e8f2a5 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -181,6 +181,8 @@ LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
 
   if (LIKELY(dest_is_fp == src_is_fp)) {
     if (LIKELY(!dest_is_fp)) {
+      DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
+
       // Core/core copy.
       // Copies involving the sp register require a different instruction.
       opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
@@ -210,14 +212,14 @@ LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
       if (r_dest.IsDouble()) {
         opcode = kA64Fmov2Sx;
       } else {
-        DCHECK(r_src.IsSingle());
+        r_src = Check32BitReg(r_src);
         opcode = kA64Fmov2sw;
       }
     } else {
       if (r_src.IsDouble()) {
         opcode = kA64Fmov2xS;
       } else {
-        DCHECK(r_dest.Is32Bit());
+        r_dest = Check32BitReg(r_dest);
         opcode = kA64Fmov2ws;
       }
     }
@@ -655,7 +657,7 @@ void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
 
   rl_src = LoadValue(rl_src, kCoreReg);
   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 31);
+  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
   StoreValueWide(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 6105837f79..dcb0050a80 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -88,7 +88,7 @@ RegLocation Arm64Mir2Lir::LocCReturn() {
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnRef() {
-  return arm_loc_c_return;
+  return arm_loc_c_return_ref;
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnWide() {
@@ -1097,7 +1097,7 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
 
       // Instead of allocating a new temp, simply reuse one of the registers being used
       // for argument passing.
-      RegStorage temp = TargetReg(kArg3);
+      RegStorage temp = TargetReg(kArg3, false);
 
       // Now load the argument VR and store to the outs.
       Load32Disp(TargetReg(kSp), current_src_offset, temp);
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index e2484101df..ca78e5be72 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -732,7 +732,7 @@ LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1
         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
       } else {
         log_imm = -1;
-        alt_opcode = (neg) ? kA64Add4RRre : kA64Sub4RRre;
+        alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre;
         info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
       }
       break;
@@ -891,9 +891,8 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto
   LIR* load;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
-  DCHECK(r_base.Is64Bit());
-  // TODO: need a cleaner handling of index registers here and throughout.
-  r_index = Check32BitReg(r_index);
+  r_base = Check64BitReg(r_base);
+  r_index = Check64BitReg(r_index);
 
   if (r_dest.IsFloat()) {
     if (r_dest.IsDouble()) {
@@ -928,17 +927,21 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto
       expected_scale = 2;
       break;
     case kUnsignedHalf:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrh4wXxd;
       expected_scale = 1;
       break;
     case kSignedHalf:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrsh4rXxd;
       expected_scale = 1;
       break;
     case kUnsignedByte:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrb3wXx;
       break;
     case kSignedByte:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrsb3rXx;
       break;
     default:
@@ -968,9 +971,8 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt
   LIR* store;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
-  DCHECK(r_base.Is64Bit());
-  // TODO: need a cleaner handling of index registers here and throughout.
-  r_index = Check32BitReg(r_index);
+  r_base = Check64BitReg(r_base);
+  r_index = Check64BitReg(r_index);
 
   if (r_src.IsFloat()) {
     if (r_src.IsDouble()) {
@@ -1006,11 +1008,13 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt
       break;
     case kUnsignedHalf:
     case kSignedHalf:
+      r_src = Check32BitReg(r_src);
       opcode = kA64Strh4wXxd;
       expected_scale = 1;
       break;
     case kUnsignedByte:
     case kSignedByte:
+      r_src = Check32BitReg(r_src);
       opcode = kA64Strb3wXx;
       break;
     default:
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index f31b670164..e571b3a407 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1184,7 +1184,7 @@ void Mir2Lir::LoadCodeAddress(const MethodReference& target_method, InvokeType t
     // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target);
   AppendLIR(load_pc_rel);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
 }
@@ -1200,7 +1200,7 @@ void Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType
     // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target);
   AppendLIR(load_pc_rel);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 04a23cf133..2c59055243 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -127,14 +127,17 @@ void Mir2Lir::GenArrayBoundsCheck(int index, RegStorage length) {
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
 
-      m2l_->OpRegCopy(m2l_->TargetReg(kArg1), length_);
-      m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
+      RegStorage arg1_32 = m2l_->TargetReg(kArg1, false);
+      RegStorage arg0_32 = m2l_->TargetReg(kArg0, false);
+
+      m2l_->OpRegCopy(arg1_32, length_);
+      m2l_->LoadConstant(arg0_32, index_);
       if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
-                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+                                      arg0_32, arg1_32, true);
       } else {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+                                      arg0_32, arg1_32, true);
       }
     }
 
@@ -473,7 +476,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
     switch (cu_->instruction_set) {
       case kThumb2:
       case kArm64:
-        r_val = TargetReg(kLr);
+        r_val = TargetReg(kLr, false);
         break;
       case kX86:
       case kX86_64:
@@ -597,10 +600,10 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double,
       // May do runtime call so everything to home locations.
       FlushAllRegs();
       // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1);
+      RegStorage r_method = TargetRefReg(kArg1);
       LockTemp(r_method);
       LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0);
+      r_base = TargetRefReg(kArg0);
       LockTemp(r_base);
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
                   kNotVolatile);
@@ -901,12 +904,12 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl
 
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
-  DCHECK(!cu_->target64 || rl_method.reg.Is64Bit());
+  CheckRegLocation(rl_method);
   RegStorage res_reg = AllocTempRef();
   RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
-                                                   *cu_->dex_file,
-                                                   type_idx)) {
+                                                        *cu_->dex_file,
+                                                        type_idx)) {
     // Call out to helper which resolves type and verifies access.
     // Resolved type returned in kRet0.
     if (cu_->target64) {
@@ -991,15 +994,15 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
       DCHECK(!IsTemp(rl_method.reg));
       r_method = rl_method.reg;
     } else {
-      r_method = TargetReg(kArg2);
+      r_method = TargetRefReg(kArg2);
       LoadCurrMethodDirect(r_method);
     }
     LoadRefDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
-                TargetReg(kArg0), kNotVolatile);
+                TargetRefReg(kArg0), kNotVolatile);
 
     // Might call out to helper, which will return resolved string in kRet0
-    LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0), kNotVolatile);
-    LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
+    LoadRefDisp(TargetRefReg(kArg0), offset_of_string, TargetRefReg(kRet0), kNotVolatile);
+    LIR* fromfast = OpCmpImmBranch(kCondEq, TargetRefReg(kRet0), 0, NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
     {
@@ -1189,8 +1192,9 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
-  LoadCurrMethodDirect(TargetReg(kArg1));  // kArg1 <= current Method*
-  RegStorage class_reg = TargetReg(kArg2);  // kArg2 will hold the Class*
+  RegStorage method_reg = TargetRefReg(kArg1);
+  LoadCurrMethodDirect(method_reg);   // kArg1 <= current Method*
+  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kArg0
@@ -1205,12 +1209,12 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
   } else if (use_declaring_class) {
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+    LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2)
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                 class_reg, kNotVolatile);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
     LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
@@ -1224,7 +1228,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
       }
-      OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path
+      OpRegCopy(TargetRefReg(kArg2), TargetRefReg(kRet0));  // Align usage with fast path
       LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* reload Ref */
       // Rejoin code paths
       LIR* hop_target = NewLIR0(kPseudoTargetLabel);
@@ -1232,7 +1236,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
-  RegLocation rl_result = GetReturn(kRefReg);
+  RegLocation rl_result = GetReturn(kCoreReg);
   if (cu_->instruction_set == kMips) {
     // On MIPS rArg0 != rl_result, place false in result if branch is taken.
     LoadConstant(rl_result.reg, 0);
@@ -1241,7 +1245,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
 
   /* load object->klass_ */
   DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1),
+  LoadRefDisp(TargetRefReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetRefReg(kArg1),
               kNotVolatile);
   /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class */
   LIR* branchover = NULL;
@@ -1339,26 +1343,27 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
-  LoadCurrMethodDirect(TargetReg(kArg1));  // kArg1 <= current Method*
-  RegStorage class_reg = TargetReg(kArg2);  // kArg2 will hold the Class*
+  RegStorage method_reg = TargetRefReg(kArg1);
+  LoadCurrMethodDirect(method_reg);  // kArg1 <= current Method*
+  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kRet0
     // InitializeTypeAndVerifyAccess(idx, method)
     if (cu_->target64) {
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
-                              type_idx, TargetReg(kArg1), true);
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
     } else {
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                              type_idx, TargetReg(kArg1), true);
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
     }
-    OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
+    OpRegCopy(class_reg, TargetRefReg(kRet0));  // Align usage with fast path
   } else if (use_declaring_class) {
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+    LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2)
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                 class_reg, kNotVolatile);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
     LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
@@ -1383,12 +1388,12 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_
           // InitializeTypeFromCode(idx, method)
           if (m2l_->cu_->target64) {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
-                                          m2l_->TargetReg(kArg1), true);
+                                          m2l_->TargetRefReg(kArg1), true);
           } else {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
-                                                      m2l_->TargetReg(kArg1), true);
+                                          m2l_->TargetRefReg(kArg1), true);
           }
-          m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0));  // Align usage with fast path
+          m2l_->OpRegCopy(class_reg_, m2l_->TargetRefReg(kRet0));  // Align usage with fast path
           m2l_->OpUnconditionalBranch(cont_);
         }
 
@@ -1401,7 +1406,7 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_
     }
   }
   // At this point, class_reg (kArg2) has class
-  LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
+  LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
 
   // Slow path for the case where the classes are not equal.  In this case we need
   // to call a helper function to do the check.
@@ -1435,7 +1440,7 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_
 
   if (type_known_abstract) {
     // Easier case, run slow path if target is non-null (slow path will load from target)
-    LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kArg0), 0, NULL);
+    LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kArg0), 0, nullptr);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
     AddSlowPath(new (arena_) SlowPath(this, branch, cont, true));
   } else {
@@ -1444,13 +1449,13 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_
     // slow path if the classes are not equal.
 
     /* Null is OK - continue */
-    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, nullptr);
     /* load object->klass_ */
     DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-    LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1),
-                kNotVolatile);
+    LoadRefDisp(TargetRefReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                TargetRefReg(kArg1), kNotVolatile);
 
-    LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1), class_reg, NULL);
+    LIR* branch2 = OpCmpBranch(kCondNe, TargetRefReg(kArg1), class_reg, nullptr);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
     // Add the slow path that will not perform load since this is already done.
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 569c97f3ae..bf51d28be3 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -185,11 +185,11 @@ void Mir2Lir::CallRuntimeHelperImmRegLocation(ThreadOffset<pointer_size> helper_
                                               RegLocation arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   if (arg1.wide == 0) {
-    LoadValueDirectFixed(arg1, TargetReg(kArg1));
+    LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
   } else {
     RegStorage r_tmp;
     if (cu_->target64) {
-      r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+      r_tmp = TargetReg(kArg1, true);
     } else {
       if (cu_->instruction_set == kMips) {
         // skip kArg1 for stack alignment.
@@ -211,7 +211,8 @@ template <size_t pointer_size>
 void Mir2Lir::CallRuntimeHelperRegLocationImm(ThreadOffset<pointer_size> helper_offset,
                                               RegLocation arg0, int arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadValueDirectFixed(arg0, TargetReg(kArg0));
+  DCHECK(!arg0.wide);
+  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
   LoadConstant(TargetReg(kArg1), arg1);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
@@ -223,7 +224,7 @@ template <size_t pointer_size>
 void Mir2Lir::CallRuntimeHelperImmReg(ThreadOffset<pointer_size> helper_offset, int arg0,
                                       RegStorage arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  OpRegCopy(TargetReg(kArg1), arg1);
+  OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
   LoadConstant(TargetReg(kArg0), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
@@ -276,7 +277,7 @@ void Mir2Lir::CallRuntimeHelperRegMethodRegLocation(ThreadOffset<pointer_size> h
     OpRegCopy(TargetReg(kArg0), arg0);
   }
   LoadCurrMethodDirect(TargetReg(kArg1));
-  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -288,80 +289,103 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size>
                                                       RegLocation arg0, RegLocation arg1,
                                                       bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  if (arg0.wide == 0) {
-    LoadValueDirectFixed(arg0, arg0.fp ? TargetReg(kFArg0) : TargetReg(kArg0));
+  if (cu_->instruction_set == kArm64) {
+    RegStorage arg0_reg = TargetReg((arg0.fp) ? kFArg0 : kArg0, arg0);
+
+    RegStorage arg1_reg;
+    if (arg1.fp == arg0.fp) {
+      arg1_reg = TargetReg((arg1.fp) ? kFArg1 : kArg1, arg1);
+    } else {
+      arg1_reg = TargetReg((arg1.fp) ? kFArg0 : kArg0, arg1);
+    }
+
+    if (arg0.wide == 0) {
+      LoadValueDirectFixed(arg0, arg0_reg);
+    } else {
+      LoadValueDirectWideFixed(arg0, arg0_reg);
+    }
+
     if (arg1.wide == 0) {
-      if (cu_->instruction_set == kMips) {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1));
-      } else if (cu_->instruction_set == kArm64) {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
-      } else if (cu_->instruction_set == kX86_64) {
-        if (arg0.fp) {
-          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg0));
+      LoadValueDirectFixed(arg1, arg1_reg);
+    } else {
+      LoadValueDirectWideFixed(arg1, arg1_reg);
+    }
+  } else {
+    if (arg0.wide == 0) {
+      LoadValueDirectFixed(arg0, arg0.fp ? TargetReg(kFArg0) : TargetReg(kArg0));
+      if (arg1.wide == 0) {
+        if (cu_->instruction_set == kMips) {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1));
+        } else if (cu_->instruction_set == kArm64) {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+        } else if (cu_->instruction_set == kX86_64) {
+          if (arg0.fp) {
+            LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg0));
+          } else {
+            LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg0) : TargetReg(kArg1));
+          }
         } else {
-          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg0) : TargetReg(kArg1));
+          LoadValueDirectFixed(arg1, TargetReg(kArg1));
         }
       } else {
-        LoadValueDirectFixed(arg1, TargetReg(kArg1));
+        if (cu_->instruction_set == kMips) {
+          RegStorage r_tmp;
+          if (arg1.fp) {
+            r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+          } else {
+            // skip kArg1 for stack alignment.
+            r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+          }
+          LoadValueDirectWideFixed(arg1, r_tmp);
+        } else {
+          RegStorage r_tmp;
+          if (cu_->target64) {
+            r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+          } else {
+            r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+          }
+          LoadValueDirectWideFixed(arg1, r_tmp);
+        }
       }
     } else {
-      if (cu_->instruction_set == kMips) {
-        RegStorage r_tmp;
-        if (arg1.fp) {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+      RegStorage r_tmp;
+      if (arg0.fp) {
+        if (cu_->target64) {
+          r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
         } else {
-          // skip kArg1 for stack alignment.
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
         }
-        LoadValueDirectWideFixed(arg1, r_tmp);
       } else {
-        RegStorage r_tmp;
         if (cu_->target64) {
-          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+          r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
         } else {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
         }
-        LoadValueDirectWideFixed(arg1, r_tmp);
-      }
-    }
-  } else {
-    RegStorage r_tmp;
-    if (arg0.fp) {
-      if (cu_->target64) {
-        r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
-      } else {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
-      }
-    } else {
-      if (cu_->target64) {
-        r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
-      } else {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
-      }
-    }
-    LoadValueDirectWideFixed(arg0, r_tmp);
-    if (arg1.wide == 0) {
-      if (cu_->target64) {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
-      } else {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
       }
-    } else {
-      RegStorage r_tmp;
-      if (arg1.fp) {
+      LoadValueDirectWideFixed(arg0, r_tmp);
+      if (arg1.wide == 0) {
         if (cu_->target64) {
-          r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
         } else {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
         }
       } else {
-        if (cu_->target64) {
-          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+        RegStorage r_tmp;
+        if (arg1.fp) {
+          if (cu_->target64) {
+            r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
+          } else {
+            r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+          }
         } else {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+          if (cu_->target64) {
+            r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+          } else {
+            r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+          }
         }
+        LoadValueDirectWideFixed(arg1, r_tmp);
       }
-      LoadValueDirectWideFixed(arg1, r_tmp);
     }
   }
   ClobberCallerSave();
@@ -381,16 +405,16 @@ void Mir2Lir::CopyToArgumentRegs(RegStorage arg0, RegStorage arg1) {
   if (IsSameReg(arg1, TargetReg(kArg0))) {
     if (IsSameReg(arg0, TargetReg(kArg1))) {
       // Swap kArg0 and kArg1 with kArg2 as temp.
-      OpRegCopy(TargetArgReg(kArg2, arg1.Is64Bit()), arg1);
-      OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
-      OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), TargetReg(kArg2));
+      OpRegCopy(TargetReg(kArg2, arg1.Is64Bit()), arg1);
+      OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+      OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), TargetReg(kArg2, arg1.Is64Bit()));
     } else {
-      OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), arg1);
-      OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
+      OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
+      OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
     }
   } else {
-    OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
-    OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), arg1);
+    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+    OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
   }
 }
 
@@ -421,9 +445,9 @@ template <size_t pointer_size>
 void Mir2Lir::CallRuntimeHelperImmMethodRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                     int arg0, RegLocation arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   LoadCurrMethodDirect(TargetReg(kArg1));
-  LoadConstant(TargetReg(kArg0), arg0);
+  LoadConstant(TargetReg(kArg0, arg0), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -449,13 +473,13 @@ void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset<pointer_si
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);  // The static_cast works around an
                                                         // instantiation bug in GCC.
-  LoadValueDirectFixed(arg1, TargetReg(kArg1));
+  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
   if (arg2.wide == 0) {
-    LoadValueDirectFixed(arg2, TargetReg(kArg2));
+    LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   } else {
     RegStorage r_tmp;
     if (cu_->target64) {
-      r_tmp = RegStorage::Solo64(TargetReg(kArg2).GetReg());
+      r_tmp = TargetReg(kArg2, true);
     } else {
       r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
     }
@@ -474,12 +498,9 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset<po
                                                                  RegLocation arg2,
                                                                  bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK_EQ(static_cast<unsigned int>(arg0.wide), 0U);
-  LoadValueDirectFixed(arg0, TargetReg(kArg0));
-  DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);
-  LoadValueDirectFixed(arg1, TargetReg(kArg1));
-  DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);
-  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
+  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -502,13 +523,13 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
    */
   RegLocation rl_src = rl_method;
   rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0);
+  rl_src.reg = TargetRefReg(kArg0);
   rl_src.home = false;
   MarkLive(rl_src);
   StoreValue(rl_method, rl_src);
   // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
-    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0), kNotVolatile);
+    StoreRefDisp(TargetReg(kSp), 0, rl_src.reg, kNotVolatile);
   }
 
   if (cu_->num_ins == 0) {
@@ -615,15 +636,16 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
       return -1;
     }
   } else {
+    RegStorage arg0_ref = cg->TargetRefReg(kArg0);
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(cg->TargetReg(kArg0));
+      cg->LoadCurrMethodDirect(arg0_ref);
       break;
     case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadRefDisp(cg->TargetReg(kArg0),
+      cg->LoadRefDisp(arg0_ref,
                       mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                      cg->TargetReg(kArg0),
+                      arg0_ref,
                       kNotVolatile);
       // Set up direct code if known.
       if (direct_code != 0) {
@@ -637,15 +659,15 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
       break;
     case 2:  // Grab target method*
       CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadRefDisp(cg->TargetReg(kArg0),
+      cg->LoadRefDisp(arg0_ref,
                       ObjArray::OffsetOfElement(target_method.dex_method_index).Int32Value(),
-                      cg->TargetReg(kArg0),
+                      arg0_ref,
                       kNotVolatile);
       break;
     case 3:  // Grab the code from the method*
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
         if (direct_code == 0) {
-          cg->LoadWordDisp(cg->TargetReg(kArg0),
+          cg->LoadWordDisp(arg0_ref,
                            mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                            cg->TargetReg(kInvokeTgt));
         }
@@ -678,13 +700,13 @@ static int NextVCallInsn(CompilationUnit* cu, CallInfo* info,
   switch (state) {
     case 0: {  // Get "this" [set kArg1]
       RegLocation  rl_arg = info->args[0];
-      cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1));
+      cg->LoadValueDirectFixed(rl_arg, cg->TargetRefReg(kArg1));
       break;
     }
     case 1:  // Is "this" null? [use kArg1]
-      cg->GenNullCheck(cg->TargetReg(kArg1), info->opt_flags);
+      cg->GenNullCheck(cg->TargetRefReg(kArg1), info->opt_flags);
       // get this->klass_ [use kArg1, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
+      cg->LoadRefDisp(cg->TargetRefReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
                       cg->TargetReg(kInvokeTgt),
                       kNotVolatile);
       cg->MarkPossibleNullPointerException(info->opt_flags);
@@ -697,12 +719,12 @@ static int NextVCallInsn(CompilationUnit* cu, CallInfo* info,
     case 3:  // Get target method [use kInvokeTgt, set kArg0]
       cg->LoadRefDisp(cg->TargetReg(kInvokeTgt),
                       ObjArray::OffsetOfElement(method_idx).Int32Value(),
-                      cg->TargetReg(kArg0),
+                      cg->TargetRefReg(kArg0),
                       kNotVolatile);
       break;
     case 4:  // Get the compiled code address [uses kArg0, sets kInvokeTgt]
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-        cg->LoadWordDisp(cg->TargetReg(kArg0),
+        cg->LoadWordDisp(cg->TargetRefReg(kArg0),
                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                          cg->TargetReg(kInvokeTgt));
         break;
@@ -736,13 +758,13 @@ static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state,
       break;
     case 1: {  // Get "this" [set kArg1]
       RegLocation  rl_arg = info->args[0];
-      cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1));
+      cg->LoadValueDirectFixed(rl_arg, cg->TargetRefReg(kArg1));
       break;
     }
     case 2:  // Is "this" null? [use kArg1]
-      cg->GenNullCheck(cg->TargetReg(kArg1), info->opt_flags);
+      cg->GenNullCheck(cg->TargetRefReg(kArg1), info->opt_flags);
       // Get this->klass_ [use kArg1, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
+      cg->LoadRefDisp(cg->TargetRefReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
                       cg->TargetReg(kInvokeTgt),
                       kNotVolatile);
       cg->MarkPossibleNullPointerException(info->opt_flags);
@@ -757,12 +779,12 @@ static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state,
       // NOTE: native pointer.
       cg->LoadRefDisp(cg->TargetReg(kInvokeTgt),
                        ObjArray::OffsetOfElement(method_idx % ClassLinker::kImtSize).Int32Value(),
-                       cg->TargetReg(kArg0),
+                       cg->TargetRefReg(kArg0),
                        kNotVolatile);
       break;
     case 5:  // Get the compiled code address [use kArg0, set kInvokeTgt]
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-        cg->LoadWordDisp(cg->TargetReg(kArg0),
+        cg->LoadWordDisp(cg->TargetRefReg(kArg0),
                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                          cg->TargetReg(kInvokeTgt));
         break;
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index bfb77fc222..1cddeb9771 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -192,7 +192,7 @@ void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) {
       IsPromoted(rl_src.reg) ||
       (rl_dest.location == kLocPhysReg)) {
       // Src is live/promoted or Dest has assigned reg.
-      rl_dest = EvalLoc(rl_dest, kAnyReg, false);
+      rl_dest = EvalLoc(rl_dest, rl_dest.ref || rl_src.ref ? kRefReg : kAnyReg, false);
       OpRegCopy(rl_dest.reg, rl_src.reg);
     } else {
       // Just re-assign the registers.  Dest gets Src's regs
@@ -201,7 +201,7 @@ void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) {
     }
   } else {
     // Load Src either into promoted Dest or temps allocated for Dest
-    rl_dest = EvalLoc(rl_dest, kAnyReg, false);
+    rl_dest = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kAnyReg, false);
     LoadValueDirect(rl_src, rl_dest.reg);
   }
 
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 9912101eb1..9a62255f5d 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -253,6 +253,19 @@ inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(RegStorage reg) {
   return res;
 }
 
+inline void Mir2Lir::CheckRegLocation(RegLocation rl) const {
+  if (kFailOnSizeError || kReportSizeError) {
+    CheckRegLocationImpl(rl, kFailOnSizeError, kReportSizeError);
+  }
+}
+
+inline void Mir2Lir::CheckRegStorage(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp)
+    const {
+  if (kFailOnSizeError || kReportSizeError) {
+    CheckRegStorageImpl(rs, wide, ref, fp, kFailOnSizeError, kReportSizeError);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 5d68187d8b..984e8ea5f8 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1267,4 +1267,55 @@ LIR* Mir2Lir::LIRSlowPath::GenerateTargetLabel(int opcode) {
   return target;
 }
 
+
+void Mir2Lir::CheckRegStorageImpl(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp,
+                                  bool fail, bool report)
+    const  {
+  if (rs.Valid()) {
+    if (ref == RefCheck::kCheckRef) {
+      if (cu_->target64 && !rs.Is64Bit()) {
+        if (fail) {
+          CHECK(false) << "Reg storage not 64b for ref.";
+        } else if (report) {
+          LOG(WARNING) << "Reg storage not 64b for ref.";
+        }
+      }
+    }
+    if (wide == WidenessCheck::kCheckWide) {
+      if (!rs.Is64Bit()) {
+        if (fail) {
+          CHECK(false) << "Reg storage not 64b for wide.";
+        } else if (report) {
+          LOG(WARNING) << "Reg storage not 64b for wide.";
+        }
+      }
+    }
+    // A tighter check would be nice, but for now soft-float will not check float at all.
+    if (fp == FPCheck::kCheckFP && cu_->instruction_set != kArm) {
+      if (!rs.IsFloat()) {
+        if (fail) {
+          CHECK(false) << "Reg storage not float for fp.";
+        } else if (report) {
+          LOG(WARNING) << "Reg storage not float for fp.";
+        }
+      }
+    } else if (fp == FPCheck::kCheckNotFP) {
+      if (rs.IsFloat()) {
+        if (fail) {
+          CHECK(false) << "Reg storage float for not-fp.";
+        } else if (report) {
+          LOG(WARNING) << "Reg storage float for not-fp.";
+        }
+      }
+    }
+  }
+}
+
+void Mir2Lir::CheckRegLocationImpl(RegLocation rl, bool fail, bool report) const {
+  // Regrettably can't use the fp part of rl, as that is not really indicative of where a value
+  // will be stored.
+  CheckRegStorageImpl(rl.reg, rl.wide ? WidenessCheck::kCheckWide : WidenessCheck::kCheckNotWide,
+      rl.ref ? RefCheck::kCheckRef : RefCheck::kCheckNotRef, FPCheck::kIgnoreFP, fail, report);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 171e871393..0c00df39f8 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -21,6 +21,7 @@
 #include "compiled_method.h"
 #include "dex/compiler_enums.h"
 #include "dex/compiler_ir.h"
+#include "dex/reg_location.h"
 #include "dex/reg_storage.h"
 #include "dex/backend.h"
 #include "dex/quick/resource_mask.h"
@@ -124,7 +125,6 @@ struct CompilationUnit;
 struct InlineMethod;
 struct MIR;
 struct LIR;
-struct RegLocation;
 struct RegisterInfo;
 class DexFileMethodInliner;
 class MIRGraph;
@@ -237,6 +237,9 @@ COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86_64),
 
 class Mir2Lir : public Backend {
   public:
+    static constexpr bool kFailOnSizeError = true && kIsDebugBuild;
+    static constexpr bool kReportSizeError = true && kIsDebugBuild;
+
     /*
      * Auxiliary information describing the location of data embedded in the Dalvik
      * byte code stream.
@@ -1171,7 +1174,43 @@ class Mir2Lir : public Backend {
     virtual void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) = 0;
 
     // Required for target - register utilities.
+
+    /**
+     * @brief Portable way of getting special registers from the backend.
+     * @param reg Enumeration describing the purpose of the register.
+     * @return Return the #RegStorage corresponding to the given purpose @p reg.
+     * @note This function is currently allowed to return any suitable view of the registers
+     *   (e.g. this could be 64-bit solo or 32-bit solo for 64-bit backends).
+     */
     virtual RegStorage TargetReg(SpecialTargetRegister reg) = 0;
+
+    /**
+     * @brief Portable way of getting special registers from the backend.
+     * @param reg Enumeration describing the purpose of the register.
+     * @param is_wide Whether the view should be 64-bit (rather than 32-bit).
+     * @return Return the #RegStorage corresponding to the given purpose @p reg.
+     */
+    virtual RegStorage TargetReg(SpecialTargetRegister reg, bool is_wide) {
+      return TargetReg(reg);
+    }
+
+    /**
+     * @brief Portable way of getting a special register for storing a reference.
+     * @see TargetReg()
+     */
+    virtual RegStorage TargetRefReg(SpecialTargetRegister reg) {
+      return TargetReg(reg);
+    }
+
+    // Get a reg storage corresponding to the wide & ref flags of the reg location.
+    virtual RegStorage TargetReg(SpecialTargetRegister reg, RegLocation loc) {
+      if (loc.ref) {
+        return TargetRefReg(reg);
+      } else {
+        return TargetReg(reg, loc.wide);
+      }
+    }
+
     virtual RegStorage GetArgMappingToPhysicalReg(int arg_num) = 0;
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
@@ -1567,6 +1606,45 @@ class Mir2Lir : public Backend {
      */
     virtual void GenConst(RegLocation rl_dest, int value);
 
+    enum class WidenessCheck {  // private
+      kIgnoreWide,
+      kCheckWide,
+      kCheckNotWide
+    };
+
+    enum class RefCheck {  // private
+      kIgnoreRef,
+      kCheckRef,
+      kCheckNotRef
+    };
+
+    enum class FPCheck {  // private
+      kIgnoreFP,
+      kCheckFP,
+      kCheckNotFP
+    };
+
+    /**
+     * Check whether a reg storage seems well-formed, that is, if a reg storage is valid,
+     * that it has the expected form for the flags.
+     * A flag value of 0 means ignore. A flag value of -1 means false. A flag value of 1 means true.
+     */
+    void CheckRegStorageImpl(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp, bool fail,
+                             bool report)
+        const;
+
+    /**
+     * Check whether a reg location seems well-formed, that is, if a reg storage is encoded,
+     * that it has the expected size.
+     */
+    void CheckRegLocationImpl(RegLocation rl, bool fail, bool report) const;
+
+    // See CheckRegStorageImpl. Will print or fail depending on kFailOnSizeError and
+    // kReportSizeError.
+    void CheckRegStorage(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp) const;
+    // See CheckRegLocationImpl.
+    void CheckRegLocation(RegLocation rl) const;
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 81dabd448e..38370ad889 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -420,24 +420,28 @@ RegStorage Mir2Lir::AllocTempWide() {
     RegStorage high_reg = AllocTemp();
     res = RegStorage::MakeRegPair(low_reg, high_reg);
   }
+  CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kIgnoreRef, FPCheck::kCheckNotFP);
   return res;
 }
 
 RegStorage Mir2Lir::AllocTempRef() {
   RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, true);
   DCHECK(!res.IsPair());
+  CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckRef, FPCheck::kCheckNotFP);
   return res;
 }
 
 RegStorage Mir2Lir::AllocTempSingle() {
   RegStorage res = AllocTempBody(reg_pool_->sp_regs_, &reg_pool_->next_sp_reg_, true);
   DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits();
+  CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
   return res;
 }
 
 RegStorage Mir2Lir::AllocTempDouble() {
   RegStorage res = AllocTempBody(reg_pool_->dp_regs_, &reg_pool_->next_dp_reg_, true);
   DCHECK(res.IsDouble()) << "Reg: 0x" << std::hex << res.GetRawBits();
+  CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
   return res;
 }
 
@@ -474,13 +478,15 @@ RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
   RegStorage reg;
   if (reg_class == kRefReg) {
     reg = FindLiveReg(*reg_pool_->ref_regs_, s_reg);
+    CheckRegStorage(reg, WidenessCheck::kCheckNotWide, RefCheck::kCheckRef, FPCheck::kCheckNotFP);
   }
   if (!reg.Valid() && ((reg_class == kAnyReg) || (reg_class == kFPReg))) {
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
     if (cu_->target64) {
-      reg = FindLiveReg(wide ? reg_pool_->core64_regs_ : reg_pool_->core_regs_, s_reg);
+      reg = FindLiveReg(wide || reg_class == kRefReg ? reg_pool_->core64_regs_ :
+                                                       reg_pool_->core_regs_, s_reg);
     } else {
       reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
     }
@@ -525,6 +531,9 @@ RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
       ClobberSReg(s_reg + 1);
     }
   }
+  CheckRegStorage(reg, WidenessCheck::kIgnoreWide,
+                  reg_class == kRefReg ? RefCheck::kCheckRef : RefCheck::kIgnoreRef,
+                  FPCheck::kIgnoreFP);
   return reg;
 }
 
@@ -996,7 +1005,7 @@ RegLocation Mir2Lir::UpdateLoc(RegLocation loc) {
   if (loc.location != kLocPhysReg) {
     DCHECK((loc.location == kLocDalvikFrame) ||
          (loc.location == kLocCompilerTemp));
-    RegStorage reg = AllocLiveReg(loc.s_reg_low, kAnyReg, false);
+    RegStorage reg = AllocLiveReg(loc.s_reg_low, loc.ref ? kRefReg : kAnyReg, false);
     if (reg.Valid()) {
       bool match = true;
       RegisterInfo* info = GetRegInfo(reg);
@@ -1010,6 +1019,7 @@ RegLocation Mir2Lir::UpdateLoc(RegLocation loc) {
         FreeTemp(reg);
       }
     }
+    CheckRegLocation(loc);
   }
   return loc;
 }
@@ -1044,6 +1054,7 @@ RegLocation Mir2Lir::UpdateLocWide(RegLocation loc) {
         FreeTemp(reg);
       }
     }
+    CheckRegLocation(loc);
   }
   return loc;
 }
@@ -1073,6 +1084,7 @@ RegLocation Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) {
       MarkWide(loc.reg);
       MarkLive(loc);
     }
+    CheckRegLocation(loc);
     return loc;
   }
 
@@ -1086,10 +1098,16 @@ RegLocation Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) {
     loc.location = kLocPhysReg;
     MarkLive(loc);
   }
+  CheckRegLocation(loc);
   return loc;
 }
 
 RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
+  // Narrow reg_class if the loc is a ref.
+  if (loc.ref && reg_class == kAnyReg) {
+    reg_class = kRefReg;
+  }
+
   if (loc.wide) {
     return EvalLocWide(loc, reg_class, update);
   }
@@ -1106,17 +1124,20 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
       loc.reg = new_reg;
       MarkLive(loc);
     }
+    CheckRegLocation(loc);
     return loc;
   }
 
   DCHECK_NE(loc.s_reg_low, INVALID_SREG);
 
   loc.reg = AllocTypedTemp(loc.fp, reg_class);
+  CheckRegLocation(loc);
 
   if (update) {
     loc.location = kLocPhysReg;
     MarkLive(loc);
   }
+  CheckRegLocation(loc);
   return loc;
 }
 
@@ -1338,6 +1359,7 @@ RegLocation Mir2Lir::GetReturnWide(RegisterClass reg_class) {
   Clobber(res.reg);
   LockTemp(res.reg);
   MarkWide(res.reg);
+  CheckRegLocation(res);
   return res;
 }
 
@@ -1354,6 +1376,7 @@ RegLocation Mir2Lir::GetReturn(RegisterClass reg_class) {
   } else {
     LockTemp(res.reg);
   }
+  CheckRegLocation(res);
   return res;
 }
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index b15591b413..64b4af86a2 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -1883,54 +1883,42 @@ void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
    */
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   for (int i = 0; i < cu_->num_ins; i++) {
-    PromotionMap* v_map = &promotion_map_[start_vreg + i];
-    RegStorage reg = RegStorage::InvalidReg();
     // get reg corresponding to input
-    reg = GetArgMappingToPhysicalReg(i);
+    RegStorage reg = GetArgMappingToPhysicalReg(i);
 
+    RegLocation* t_loc = &ArgLocs[i];
     if (reg.Valid()) {
-      // If arriving in register
-      bool need_flush = true;
-      RegLocation* t_loc = &ArgLocs[i];
-      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-        need_flush = false;
-      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
-        need_flush = false;
-      } else {
-        need_flush = true;
-      }
+      // If arriving in register.
 
-      // For wide args, force flush if not fully promoted
-      if (t_loc->wide) {
-        PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
-        // Is only half promoted?
-        need_flush |= (p_map->core_location != v_map->core_location) ||
-            (p_map->fp_location != v_map->fp_location);
-      }
-      if (need_flush) {
-        if (t_loc->wide && t_loc->fp) {
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64, kNotVolatile);
-          // Increment i to skip the next one
-          i++;
-        } else if (t_loc->wide && !t_loc->fp) {
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64, kNotVolatile);
-          // Increment i to skip the next one
-          i++;
+      // We have already updated the arg location with promoted info
+      // so we can be based on it.
+      if (t_loc->location == kLocPhysReg) {
+        // Just copy it.
+        OpRegCopy(t_loc->reg, reg);
+      } else {
+        // Needs flush.
+        if (t_loc->ref) {
+          StoreRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
         } else {
-          Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
+                        kNotVolatile);
         }
       }
     } else {
-      // If arriving in frame & promoted
-      if (v_map->core_location == kLocPhysReg) {
-        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
-      }
-      if (v_map->fp_location == kLocPhysReg) {
-        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      // If arriving in frame & promoted.
+      if (t_loc->location == kLocPhysReg) {
+        if (t_loc->ref) {
+          LoadRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
+        } else {
+          LoadBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
+                       t_loc->wide ? k64 : k32, kNotVolatile);
+        }
       }
     }
+    if (t_loc->wide) {
+      // Increment i to skip the next one.
+      i++;
+    }
   }
 }
 
diff --git a/compiler/dex/reg_location.h b/compiler/dex/reg_location.h
new file mode 100644
index 0000000000..38f59dac5f
--- /dev/null
+++ b/compiler/dex/reg_location.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_REG_LOCATION_H_
+#define ART_COMPILER_DEX_REG_LOCATION_H_
+
+#include "reg_storage.h"
+
+namespace art {
+
+
+/*
+ * Whereas a SSA name describes a definition of a Dalvik vreg, the RegLocation describes
+ * the type of an SSA name (and, can also be used by code generators to record where the
+ * value is located (i.e. - physical register, frame, spill, etc.).  For each SSA name (SReg)
+ * there is a RegLocation.
+ * A note on SSA names:
+ *   o SSA names for Dalvik vRegs v0..vN will be assigned 0..N.  These represent the "vN_0"
+ *     names.  Negative SSA names represent special values not present in the Dalvik byte code.
+ *     For example, SSA name -1 represents an invalid SSA name, and SSA name -2 represents the
+ *     the Method pointer.  SSA names < -2 are reserved for future use.
+ *   o The vN_0 names for non-argument Dalvik should in practice never be used (as they would
+ *     represent the read of an undefined local variable).  The first definition of the
+ *     underlying Dalvik vReg will result in a vN_1 name.
+ *
+ * FIXME: The orig_sreg field was added as a workaround for llvm bitcode generation.  With
+ * the latest restructuring, we should be able to remove it and rely on s_reg_low throughout.
+ */
+struct RegLocation {
+  RegLocationType location:3;
+  unsigned wide:1;
+  unsigned defined:1;   // Do we know the type?
+  unsigned is_const:1;  // Constant, value in mir_graph->constant_values[].
+  unsigned fp:1;        // Floating point?
+  unsigned core:1;      // Non-floating point?
+  unsigned ref:1;       // Something GC cares about.
+  unsigned high_word:1;  // High word of pair?
+  unsigned home:1;      // Does this represent the home location?
+  RegStorage reg;       // Encoded physical registers.
+  int16_t s_reg_low;    // SSA name for low Dalvik word.
+  int16_t orig_sreg;    // TODO: remove after Bitcode gen complete
+                        // and consolidate usage w/ s_reg_low.
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_REG_LOCATION_H_
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 8f4eddbea3..25b489ba79 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -1284,13 +1284,6 @@ TEST_F(JniCompilerTest, WithoutImplementation) {
   EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE);
 }
 
-template <typename U, typename V> V convert(U in) {
-  DCHECK_LE(sizeof(U), sizeof(V));
-  union { U u; V v; } tmp;
-  tmp.u = in;
-  return tmp.v;
-}
-
 void Java_MyClassNatives_stackArgsIntsFirst(JNIEnv* env, jclass klass, jint i1, jint i2, jint i3,
                                             jint i4, jint i5, jint i6, jint i7, jint i8, jint i9,
                                             jint i10, jfloat f1, jfloat f2, jfloat f3, jfloat f4,
@@ -1307,25 +1300,25 @@ void Java_MyClassNatives_stackArgsIntsFirst(JNIEnv* env, jclass klass, jint i1,
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = convert<jfloat, jint>(f1);
+  jint i11 = bit_cast<jfloat, jint>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = convert<jfloat, jint>(f2);
+  jint i12 = bit_cast<jfloat, jint>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = convert<jfloat, jint>(f3);
+  jint i13 = bit_cast<jfloat, jint>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = convert<jfloat, jint>(f4);
+  jint i14 = bit_cast<jfloat, jint>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = convert<jfloat, jint>(f5);
+  jint i15 = bit_cast<jfloat, jint>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = convert<jfloat, jint>(f6);
+  jint i16 = bit_cast<jfloat, jint>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = convert<jfloat, jint>(f7);
+  jint i17 = bit_cast<jfloat, jint>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = convert<jfloat, jint>(f8);
+  jint i18 = bit_cast<jfloat, jint>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = convert<jfloat, jint>(f9);
+  jint i19 = bit_cast<jfloat, jint>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = convert<jfloat, jint>(f10);
+  jint i20 = bit_cast<jfloat, jint>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1345,16 +1338,16 @@ TEST_F(JniCompilerTest, StackArgsIntsFirst) {
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = convert<jint, jfloat>(11);
-  jfloat f2 = convert<jint, jfloat>(12);
-  jfloat f3 = convert<jint, jfloat>(13);
-  jfloat f4 = convert<jint, jfloat>(14);
-  jfloat f5 = convert<jint, jfloat>(15);
-  jfloat f6 = convert<jint, jfloat>(16);
-  jfloat f7 = convert<jint, jfloat>(17);
-  jfloat f8 = convert<jint, jfloat>(18);
-  jfloat f9 = convert<jint, jfloat>(19);
-  jfloat f10 = convert<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jint, jfloat>(11);
+  jfloat f2 = bit_cast<jint, jfloat>(12);
+  jfloat f3 = bit_cast<jint, jfloat>(13);
+  jfloat f4 = bit_cast<jint, jfloat>(14);
+  jfloat f5 = bit_cast<jint, jfloat>(15);
+  jfloat f6 = bit_cast<jint, jfloat>(16);
+  jfloat f7 = bit_cast<jint, jfloat>(17);
+  jfloat f8 = bit_cast<jint, jfloat>(18);
+  jfloat f9 = bit_cast<jint, jfloat>(19);
+  jfloat f10 = bit_cast<jint, jfloat>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, f1, f2,
                              f3, f4, f5, f6, f7, f8, f9, f10);
@@ -1376,25 +1369,25 @@ void Java_MyClassNatives_stackArgsFloatsFirst(JNIEnv* env, jclass klass, jfloat
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = convert<jfloat, jint>(f1);
+  jint i11 = bit_cast<jfloat, jint>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = convert<jfloat, jint>(f2);
+  jint i12 = bit_cast<jfloat, jint>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = convert<jfloat, jint>(f3);
+  jint i13 = bit_cast<jfloat, jint>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = convert<jfloat, jint>(f4);
+  jint i14 = bit_cast<jfloat, jint>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = convert<jfloat, jint>(f5);
+  jint i15 = bit_cast<jfloat, jint>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = convert<jfloat, jint>(f6);
+  jint i16 = bit_cast<jfloat, jint>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = convert<jfloat, jint>(f7);
+  jint i17 = bit_cast<jfloat, jint>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = convert<jfloat, jint>(f8);
+  jint i18 = bit_cast<jfloat, jint>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = convert<jfloat, jint>(f9);
+  jint i19 = bit_cast<jfloat, jint>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = convert<jfloat, jint>(f10);
+  jint i20 = bit_cast<jfloat, jint>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1414,16 +1407,16 @@ TEST_F(JniCompilerTest, StackArgsFloatsFirst) {
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = convert<jint, jfloat>(11);
-  jfloat f2 = convert<jint, jfloat>(12);
-  jfloat f3 = convert<jint, jfloat>(13);
-  jfloat f4 = convert<jint, jfloat>(14);
-  jfloat f5 = convert<jint, jfloat>(15);
-  jfloat f6 = convert<jint, jfloat>(16);
-  jfloat f7 = convert<jint, jfloat>(17);
-  jfloat f8 = convert<jint, jfloat>(18);
-  jfloat f9 = convert<jint, jfloat>(19);
-  jfloat f10 = convert<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jint, jfloat>(11);
+  jfloat f2 = bit_cast<jint, jfloat>(12);
+  jfloat f3 = bit_cast<jint, jfloat>(13);
+  jfloat f4 = bit_cast<jint, jfloat>(14);
+  jfloat f5 = bit_cast<jint, jfloat>(15);
+  jfloat f6 = bit_cast<jint, jfloat>(16);
+  jfloat f7 = bit_cast<jint, jfloat>(17);
+  jfloat f8 = bit_cast<jint, jfloat>(18);
+  jfloat f9 = bit_cast<jint, jfloat>(19);
+  jfloat f10 = bit_cast<jint, jfloat>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, i1, i2, i3,
                              i4, i5, i6, i7, i8, i9, i10);
@@ -1444,25 +1437,25 @@ void Java_MyClassNatives_stackArgsMixed(JNIEnv* env, jclass klass, jint i1, jflo
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = convert<jfloat, jint>(f1);
+  jint i11 = bit_cast<jfloat, jint>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = convert<jfloat, jint>(f2);
+  jint i12 = bit_cast<jfloat, jint>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = convert<jfloat, jint>(f3);
+  jint i13 = bit_cast<jfloat, jint>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = convert<jfloat, jint>(f4);
+  jint i14 = bit_cast<jfloat, jint>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = convert<jfloat, jint>(f5);
+  jint i15 = bit_cast<jfloat, jint>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = convert<jfloat, jint>(f6);
+  jint i16 = bit_cast<jfloat, jint>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = convert<jfloat, jint>(f7);
+  jint i17 = bit_cast<jfloat, jint>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = convert<jfloat, jint>(f8);
+  jint i18 = bit_cast<jfloat, jint>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = convert<jfloat, jint>(f9);
+  jint i19 = bit_cast<jfloat, jint>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = convert<jfloat, jint>(f10);
+  jint i20 = bit_cast<jfloat, jint>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1482,16 +1475,16 @@ TEST_F(JniCompilerTest, StackArgsMixed) {
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = convert<jint, jfloat>(11);
-  jfloat f2 = convert<jint, jfloat>(12);
-  jfloat f3 = convert<jint, jfloat>(13);
-  jfloat f4 = convert<jint, jfloat>(14);
-  jfloat f5 = convert<jint, jfloat>(15);
-  jfloat f6 = convert<jint, jfloat>(16);
-  jfloat f7 = convert<jint, jfloat>(17);
-  jfloat f8 = convert<jint, jfloat>(18);
-  jfloat f9 = convert<jint, jfloat>(19);
-  jfloat f10 = convert<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jint, jfloat>(11);
+  jfloat f2 = bit_cast<jint, jfloat>(12);
+  jfloat f3 = bit_cast<jint, jfloat>(13);
+  jfloat f4 = bit_cast<jint, jfloat>(14);
+  jfloat f5 = bit_cast<jint, jfloat>(15);
+  jfloat f6 = bit_cast<jint, jfloat>(16);
+  jfloat f7 = bit_cast<jint, jfloat>(17);
+  jfloat f8 = bit_cast<jint, jfloat>(18);
+  jfloat f9 = bit_cast<jint, jfloat>(19);
+  jfloat f10 = bit_cast<jint, jfloat>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, i6, f6, i7,
                              f7, i8, f8, i9, f9, i10, f10);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c3a322caee..cc995f72a1 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -226,7 +226,7 @@ HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const {
 }
 
 template<typename T>
-void HGraphBuilder::Binop_32x(const Instruction& instruction, Primitive::Type type) {
+void HGraphBuilder::Binop_23x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegB(), type);
   HInstruction* second = LoadLocal(instruction.VRegC(), type);
   current_block_->AddInstruction(new (arena_) T(type, first, second));
@@ -501,22 +501,22 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_
     }
 
     case Instruction::ADD_INT: {
-      Binop_32x<HAdd>(instruction, Primitive::kPrimInt);
+      Binop_23x<HAdd>(instruction, Primitive::kPrimInt);
       break;
     }
 
     case Instruction::ADD_LONG: {
-      Binop_32x<HAdd>(instruction, Primitive::kPrimLong);
+      Binop_23x<HAdd>(instruction, Primitive::kPrimLong);
       break;
     }
 
     case Instruction::SUB_INT: {
-      Binop_32x<HSub>(instruction, Primitive::kPrimInt);
+      Binop_23x<HSub>(instruction, Primitive::kPrimInt);
       break;
     }
 
     case Instruction::SUB_LONG: {
-      Binop_32x<HSub>(instruction, Primitive::kPrimLong);
+      Binop_23x<HSub>(instruction, Primitive::kPrimLong);
       break;
     }
 
@@ -573,6 +573,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
 
+    case Instruction::CMP_LONG: {
+      Binop_23x<HCompare>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::NOP:
       break;
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 0852a26c55..ee32ca80ac 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -73,7 +73,7 @@ class HGraphBuilder : public ValueObject {
   bool InitializeParameters(uint16_t number_of_parameters);
 
   template<typename T>
-  void Binop_32x(const Instruction& instruction, Primitive::Type type);
+  void Binop_23x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
@@ -84,11 +84,8 @@ class HGraphBuilder : public ValueObject {
   template<typename T>
   void Binop_22s(const Instruction& instruction, bool reverse);
 
-  template<typename T>
-  void If_22t(const Instruction& instruction, int32_t dex_offset);
-
-  template<typename T>
-  void If_21t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_21t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 83621e0f72..ae2f03080e 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -90,6 +90,7 @@ class CodeGenerator : public ArenaObject {
   virtual void SetupBlockedRegisters(bool* blocked_registers) const = 0;
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
+  virtual InstructionSet GetInstructionSet() const = 0;
 
   void RecordPcInfo(uint32_t dex_pc) {
     struct PcInfo pc_info;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ec3c81533f..d87c14b4db 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -905,6 +905,48 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) {
          locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(1));
 }
 
+void LocationsBuilderARM::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      Register output = locations->Out().AsArm().AsCoreRegister();
+      ArmManagedRegister left = locations->InAt(0).AsArm();
+      ArmManagedRegister right = locations->InAt(1).AsArm();
+      Label less, greater, done;
+      __ cmp(left.AsRegisterPairHigh(),
+             ShifterOperand(right.AsRegisterPairHigh()));  // Signed compare.
+      __ b(&less, LT);
+      __ b(&greater, GT);
+      __ cmp(left.AsRegisterPairLow(),
+             ShifterOperand(right.AsRegisterPairLow()));  // Unsigned compare.
+      __ LoadImmediate(output, 0);
+      __ b(&done, EQ);
+      __ b(&less, CC);
+
+      __ Bind(&greater);
+      __ LoadImmediate(output, 1);
+      __ b(&done);
+
+      __ Bind(&less);
+      __ LoadImmediate(output, -1);
+
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+}
+
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 712a24cf67..c46c1b131c 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -171,6 +171,10 @@ class CodeGeneratorARM : public CodeGenerator {
     return &move_resolver_;
   }
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kArm;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f624f3ce90..572d494719 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -81,12 +81,23 @@ ManagedRegister CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type,
                                                        bool* blocked_registers) const {
   switch (type) {
     case Primitive::kPrimLong: {
-      size_t reg = AllocateFreeRegisterInternal(
-          GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs);
+      bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+      size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs);
       X86ManagedRegister pair =
           X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
       blocked_registers[pair.AsRegisterPairLow()] = true;
       blocked_registers[pair.AsRegisterPairHigh()] = true;
+      // Block all other register pairs that share a register with `pair`.
+      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+        X86ManagedRegister current =
+            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+        if (current.AsRegisterPairLow() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairLow() == pair.AsRegisterPairHigh()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) {
+          blocked_register_pairs[i] = true;
+        }
+      }
       return pair;
     }
 
@@ -901,6 +912,46 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) {
   __ xorl(out.AsX86().AsCpuRegister(), Immediate(1));
 }
 
+void LocationsBuilderX86::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      Label less, greater, done;
+      Register output = locations->Out().AsX86().AsCpuRegister();
+      X86ManagedRegister left = locations->InAt(0).AsX86();
+      X86ManagedRegister right = locations->InAt(1).AsX86();
+      __ cmpl(left.AsRegisterPairHigh(), right.AsRegisterPairHigh());
+      __ j(kLess, &less);  // Signed compare.
+      __ j(kGreater, &greater);  // Signed compare.
+      __ cmpl(left.AsRegisterPairLow(), right.AsRegisterPairLow());
+      __ movl(output, Immediate(0));
+      __ j(kEqual, &done);
+      __ j(kBelow, &less);  // Unsigned compare.
+
+      __ Bind(&greater);
+      __ movl(output, Immediate(1));
+      __ jmp(&done);
+
+      __ Bind(&less);
+      __ movl(output, Immediate(-1));
+
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+}
+
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index acc670e09b..8a8216a56d 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -173,6 +173,10 @@ class CodeGeneratorX86 : public CodeGenerator {
     return &move_resolver_;
   }
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kX86;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 283f1f5e57..dc1d6164b1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -228,7 +228,9 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
   }
 }
 
-void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+void CodeGeneratorX86_64::Move(HInstruction* instruction,
+                               Location location,
+                               HInstruction* move_for) {
   if (instruction->AsIntConstant() != nullptr) {
     Immediate imm(instruction->AsIntConstant()->GetValue());
     if (location.IsRegister()) {
@@ -383,7 +385,7 @@ void LocationsBuilderX86_64::VisitCondition(HCondition* comp) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
+  locations->SetOut(Location::RequiresRegister());
   comp->SetLocations(locations);
 }
 
@@ -444,6 +446,39 @@ void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual
   VisitCondition(comp);
 }
 
+void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong:
+      __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(0));
+  __ j(kEqual, &done);
+  __ j(kGreater, &greater);
+
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(-1));
+  __ jmp(&done);
+
+  __ Bind(&greater);
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1));
+
+  __ Bind(&done);
+}
+
 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
   // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
@@ -463,7 +498,7 @@ void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -812,10 +847,13 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
-    } else {
-      DCHECK(destination.IsStackSlot());
+    } else if (destination.IsStackSlot()) {
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
               source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
     }
   } else if (source.IsStackSlot()) {
     if (destination.IsRegister()) {
@@ -826,18 +864,27 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
+  } else if (source.IsDoubleStackSlot()) {
+    if (destination.IsRegister()) {
+      __ movq(destination.AsX86_64().AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
   } else {
     LOG(FATAL) << "Unimplemented";
   }
 }
 
-void ParallelMoveResolverX86_64::Exchange(CpuRegister reg, int mem) {
+void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
-  __ movl(Address(CpuRegister(RSP), mem), CpuRegister(reg));
-  __ movl(CpuRegister(reg), CpuRegister(TMP));
+  __ movl(Address(CpuRegister(RSP), mem), reg);
+  __ movl(reg, CpuRegister(TMP));
 }
 
-void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) {
+void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch(
       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
 
@@ -850,6 +897,25 @@ void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) {
           CpuRegister(ensure_scratch.GetRegister()));
 }
 
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movq(Address(CpuRegister(RSP), mem), reg);
+  __ movq(reg, CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movq(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
+}
+
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   MoveOperands* move = moves_.Get(index);
   Location source = move->GetSource();
@@ -858,11 +924,17 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   if (source.IsRegister() && destination.IsRegister()) {
     __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
-    Exchange(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+    Exchange32(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
-    Exchange(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+    Exchange32(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
-    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+    Exchange32(destination.GetStackIndex(), source.GetStackIndex());
+  } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
+    Exchange64(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
+    Exchange64(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+    Exchange64(destination.GetStackIndex(), source.GetStackIndex());
   } else {
     LOG(FATAL) << "Unimplemented";
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f07df292e0..d347a4f121 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -69,8 +69,10 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
   X86_64Assembler* GetAssembler() const;
 
  private:
-  void Exchange(CpuRegister reg, int mem);
-  void Exchange(int mem1, int mem2);
+  void Exchange32(CpuRegister reg, int mem);
+  void Exchange32(int mem1, int mem2);
+  void Exchange64(CpuRegister reg, int mem);
+  void Exchange64(int mem1, int mem2);
 
   CodeGeneratorX86_64* const codegen_;
 
@@ -170,6 +172,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kX86_64;
+  }
+
  private:
   // Helper method to move a value between two locations.
   void Move(Location destination, Location source);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index a49ce64a2d..f033e2e22b 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -108,9 +108,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
       } else {
         codegen_.DumpCoreRegister(output_, location.reg().RegId());
       }
-    } else {
-      DCHECK(location.IsStackSlot());
+    } else if (location.IsStackSlot()) {
       output_ << location.GetStackIndex() << "(sp)";
+    } else {
+      DCHECK(location.IsDoubleStackSlot());
+      output_ << "2x" << location.GetStackIndex() << "(sp)";
     }
   }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 503f31d990..92920845c3 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -414,6 +414,7 @@ class HBasicBlock : public ArenaObject {
   M(ReturnVoid)                                            \
   M(StoreLocal)                                            \
   M(Sub)                                                   \
+  M(Compare)                                               \
 
 
 #define FORWARD_DECLARATION(type) class H##type;
@@ -986,6 +987,22 @@ class HGreaterThanOrEqual : public HCondition {
 };
 
 
+// Instruction to check how two inputs compare to each other.
+// Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
+class HCompare : public HBinaryOperation {
+ public:
+  HCompare(Primitive::Type type, HInstruction* first, HInstruction* second)
+      : HBinaryOperation(Primitive::kPrimInt, first, second) {
+    DCHECK_EQ(type, first->GetType());
+    DCHECK_EQ(type, second->GetType());
+  }
+
+  DECLARE_INSTRUCTION(Compare);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HCompare);
+};
+
 // A local in the graph. Corresponds to a Dex register.
 class HLocal : public HTemplateInstruction<0> {
  public:
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1f4cb41582..68130dd5fc 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -55,7 +55,7 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph,
          it.Advance()) {
       HInstruction* current = it.Current();
       if (current->NeedsEnvironment()) return false;
-      if (current->GetType() == Primitive::kPrimLong) return false;
+      if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
       if (current->GetType() == Primitive::kPrimFloat) return false;
       if (current->GetType() == Primitive::kPrimDouble) return false;
     }
@@ -139,7 +139,7 @@ void RegisterAllocator::AllocateRegistersInternal() {
         current->SetFrom(position + 1);
         current->SetRegister(output.reg().RegId());
         BlockRegister(output, position, position + 1, instruction->GetType());
-      } else if (output.IsStackSlot()) {
+      } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
         current->SetSpillSlot(output.GetStackIndex());
       }
       for (size_t i = 0; i < instruction->InputCount(); ++i) {
@@ -430,7 +430,7 @@ bool RegisterAllocator::IsBlocked(int reg) const {
 // we spill `current` instead.
 bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
   size_t first_register_use = current->FirstRegisterUse();
-  if (current->FirstRegisterUse() == kNoLifetime) {
+  if (first_register_use == kNoLifetime) {
     AllocateSpillSlotFor(current);
     return false;
   }
@@ -559,6 +559,10 @@ LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position)
   }
 }
 
+static bool NeedTwoSpillSlot(Primitive::Type type) {
+  return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
+}
+
 void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   LiveInterval* parent = interval->GetParent();
 
@@ -581,6 +585,43 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   }
   size_t end = last_sibling->GetEnd();
 
+  if (NeedTwoSpillSlot(parent->GetType())) {
+    AllocateTwoSpillSlots(parent, end);
+  } else {
+    AllocateOneSpillSlot(parent, end);
+  }
+}
+
+void RegisterAllocator::AllocateTwoSpillSlots(LiveInterval* parent, size_t end) {
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
+    // We check if it is less rather than less or equal because the parallel move
+    // resolver does not work when a single spill slot needs to be exchanged with
+    // a double spill slot. The strict comparison avoids needing to exchange these
+    // locations at the same lifetime position.
+    if (spill_slots_.Get(slot) < parent->GetStart()
+        && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) {
+      break;
+    }
+  }
+
+  if (slot == spill_slots_.Size()) {
+    // We need a new spill slot.
+    spill_slots_.Add(end);
+    spill_slots_.Add(end);
+  } else if (slot == spill_slots_.Size() - 1) {
+    spill_slots_.Put(slot, end);
+    spill_slots_.Add(end);
+  } else {
+    spill_slots_.Put(slot, end);
+    spill_slots_.Put(slot + 1, end);
+  }
+
+  parent->SetSpillSlot(slot * kVRegSize);
+}
+
+void RegisterAllocator::AllocateOneSpillSlot(LiveInterval* parent, size_t end) {
   // Find an available spill slot.
   size_t slot = 0;
   for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
@@ -604,7 +645,11 @@ static Location ConvertToLocation(LiveInterval* interval) {
     return Location::RegisterLocation(ManagedRegister(interval->GetRegister()));
   } else {
     DCHECK(interval->GetParent()->HasSpillSlot());
-    return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+    if (NeedTwoSpillSlot(interval->GetType())) {
+      return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot());
+    } else {
+      return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+    }
   }
 }
 
@@ -750,7 +795,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
     // We spill eagerly, so move must be at definition.
     InsertMoveAfter(interval->GetDefinedBy(),
                     Location::RegisterLocation(ManagedRegister(interval->GetRegister())),
-                    Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+                    NeedTwoSpillSlot(interval->GetType())
+                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
 
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index e63122ffed..7d4cd1a862 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -93,6 +93,8 @@ class RegisterAllocator {
 
   // Allocate a spill slot for the given interval.
   void AllocateSpillSlotFor(LiveInterval* interval);
+  void AllocateOneSpillSlot(LiveInterval* interval, size_t end);
+  void AllocateTwoSpillSlots(LiveInterval* interval, size_t end);
 
   // Connect adjacent siblings within blocks.
   void ConnectSiblings(LiveInterval* interval);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 41d1529ef5..4d5d613015 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -949,6 +949,14 @@ void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
 }
 
 
+void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // andq only supports 32b immediate.
+  EmitRex64(reg);
+  EmitComplex(4, Operand(reg), imm);
+}
+
+
 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -972,6 +980,14 @@ void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
 }
 
 
+void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x33);
+  EmitOperand(dst.LowBits(), Operand(src));
+}
+
+
 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 9aa5a54df4..7514854829 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -391,12 +391,14 @@ class X86_64Assembler FINAL : public Assembler {
 
   void andl(CpuRegister dst, const Immediate& imm);
   void andl(CpuRegister dst, CpuRegister src);
+  void andq(CpuRegister dst, const Immediate& imm);
 
   void orl(CpuRegister dst, const Immediate& imm);
   void orl(CpuRegister dst, CpuRegister src);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorq(CpuRegister dst, const Immediate& imm);
+  void xorq(CpuRegister dst, CpuRegister src);
 
   void addl(CpuRegister dst, CpuRegister src);
   void addl(CpuRegister reg, const Immediate& imm);