Quick compiler: reference cleanup

For 32-bit targets, object references are 32 bits wide both in
Dalvik virtual registers and in core physical registers.  Because of
this, object references and non-floating point values were both
handled as if they had the same register class (kCoreReg).

However, for 64-bit systems, references are 32 bits in Dalvik vregs, but
64 bits in physical registers.  Although the same underlying physical
core registers will still be used for object reference and non-float
values, different register class views will be used to represent them.
For example, an object reference in arm64 might be held in x3 at some
point, while the same underlying physical register, w3, would be used
to hold a 32-bit int.

This CL breaks apart the handling of object reference and non-float values
to allow the proper register class (or register view) to be used.  A
new register class, kRefReg, is introduced which will map to a 32-bit
core register on 32-bit targets, and 64-bit core registers on 64-bit
targets.  From this point on, object references should be allocated
registers in the kRefReg class rather than kCoreReg.

Change-Id: I6166827daa8a0ea3af326940d56a6a14874f5810
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5d74b8d..9f9e618 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -313,11 +313,11 @@
 
 void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  RegStorage reset_reg = AllocTempRef();
+  LoadRefDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
+  StoreRefDisp(rs_rARM_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index f0a9ca4..9c801a5 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index dde8ff0..e06d814 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -51,7 +51,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -94,7 +94,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 2556788..769122d 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -206,13 +206,16 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  // Avoid using float regs here.
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ConditionCode ccode = mir->meta.ccode;
   if (mir->ssa_rep->num_uses == 1) {
     // CONST case
     int true_val = mir->dalvikInsn.vB;
     int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
     // Change kCondNe to kCondEq for the special cases below.
     if (ccode == kCondNe) {
       ccode = kCondEq;
@@ -239,8 +242,8 @@
       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
     } else {
       // Unlikely case - could be tuned.
-      RegStorage t_reg1 = AllocTemp();
-      RegStorage t_reg2 = AllocTemp();
+      RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class);
+      RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
       LoadConstant(t_reg1, true_val);
       LoadConstant(t_reg2, false_val);
       OpRegImm(kOpCmp, rl_src.reg, 0);
@@ -253,9 +256,9 @@
     // MOVE case
     RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
     RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
     OpRegImm(kOpCmp, rl_src.reg, 0);
     LIR* it = nullptr;
     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
@@ -814,10 +817,10 @@
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    rl_new_value = LoadValue(rl_src_new_value);
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -840,7 +843,7 @@
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+    rl_expected = LoadValue(rl_src_expected);
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
@@ -1047,7 +1050,7 @@
       ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
       FlushAllRegs();
       CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
-      rl_result = GetReturnWide(false);
+      rl_result = GetReturnWide(kCoreReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     }
@@ -1126,7 +1129,7 @@
     if (reg_status != 0) {
       // We had manually allocated registers for rl_result.
       // Now construct a RegLocation.
-      rl_result = GetReturnWide(false);  // Just using as a template.
+      rl_result = GetReturnWide(kCoreReg);  // Just using as a template.
       rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
     }
 
@@ -1168,7 +1171,7 @@
   int data_offset;
   RegLocation rl_result;
   bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -1203,7 +1206,7 @@
       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
     } else {
       // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTemp();
+      reg_ptr = AllocTempRef();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
       FreeTemp(rl_index.reg);
     }
@@ -1229,7 +1232,7 @@
     }
   } else {
     // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTemp();
+    RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -1267,7 +1270,7 @@
     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -1281,7 +1284,7 @@
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
   }
 
   /* null object? */
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 309f676..a50b90a 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -66,6 +66,10 @@
   return arm_loc_c_return;
 }
 
+RegLocation ArmMir2Lir::LocCReturnRef() {
+  return arm_loc_c_return;
+}
+
 RegLocation ArmMir2Lir::LocCReturnWide() {
   return arm_loc_c_return_wide;
 }
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 2e3ef86..d0f8e74 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -287,9 +287,9 @@
 
 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
-  Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg);
+  StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 16bb701..6251f4f 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 882ee66..acc7d17 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -47,7 +47,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -90,7 +90,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index d9428f9..7ebb496 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -88,14 +88,16 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode);
 
   RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
   RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-  rl_true = LoadValue(rl_true, kCoreReg);
-  rl_false = LoadValue(rl_false, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  rl_true = LoadValue(rl_true, result_reg_class);
+  rl_false = LoadValue(rl_false, result_reg_class);
+  rl_result = EvalLoc(rl_dest, result_reg_class, true);
   OpRegImm(kOpCmp, rl_src.reg, 0);
   NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(),
           rl_false.reg.GetReg(), code);
@@ -501,10 +503,10 @@
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    rl_new_value = LoadValue(rl_src_new_value);
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -527,7 +529,7 @@
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+    rl_expected = LoadValue(rl_src_expected);
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
@@ -769,7 +771,7 @@
   int data_offset;
   RegLocation rl_result;
   bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -804,7 +806,7 @@
       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
     } else {
       // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTemp();
+      reg_ptr = AllocTempRef();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
       FreeTemp(rl_index.reg);
     }
@@ -830,7 +832,7 @@
     }
   } else {
     // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTemp();
+    RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -871,7 +873,7 @@
     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -885,7 +887,7 @@
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
   }
 
   /* null object? */
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 598d05b..7539392 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -96,6 +96,10 @@
   return arm_loc_c_return;
 }
 
+RegLocation Arm64Mir2Lir::LocCReturnRef() {
+  return arm_loc_c_return;
+}
+
 RegLocation Arm64Mir2Lir::LocCReturnWide() {
   return arm_loc_c_return_wide;
 }
@@ -572,7 +576,7 @@
   if (UNLIKELY(is_volatile)) {
     // On arm64, fp register load/store is atomic only for single bytes.
     if (size != kSignedByte && size != kUnsignedByte) {
-      return kCoreReg;
+      return (size == kReference) ? kRefReg : kCoreReg;
     }
   }
   return RegClassBySize(size);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 4f2a876..236128f 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -220,6 +220,8 @@
 void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                                   RegLocation rl_src2, LIR* taken,
                                   LIR* fall_through) {
+  DCHECK(!rl_src1.fp);
+  DCHECK(!rl_src2.fp);
   ConditionCode cond;
   switch (opcode) {
     case Instruction::IF_EQ:
@@ -253,7 +255,7 @@
     cond = FlipComparisonOrder(cond);
   }
 
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
+  rl_src1 = LoadValue(rl_src1);
   // Is this really an immediate comparison?
   if (rl_src2.is_const) {
     // If it's already live in a register or not easily materialized, just keep going
@@ -265,14 +267,15 @@
       return;
     }
   }
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
+  rl_src2 = LoadValue(rl_src2);
   OpCmpBranch(cond, rl_src1.reg, rl_src2.reg, taken);
 }
 
 void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken,
                                       LIR* fall_through) {
   ConditionCode cond;
-  rl_src = LoadValue(rl_src, kCoreReg);
+  DCHECK(!rl_src.fp);
+  rl_src = LoadValue(rl_src);
   switch (opcode) {
     case Instruction::IF_EQZ:
       cond = kCondEq;
@@ -371,7 +374,7 @@
     func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayWithAccessCheck);
     mir_to_lir->CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
   }
-  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  RegLocation rl_result = mir_to_lir->GetReturn(kRefReg);
   mir_to_lir->StoreValue(rl_dest, rl_result);
 }
 
@@ -503,7 +506,7 @@
     }
   }
   if (info->result.location != kLocInvalid) {
-    StoreValue(info->result, GetReturn(false /* not fp */));
+    StoreValue(info->result, GetReturn(kRefReg));
   }
 }
 
@@ -563,7 +566,7 @@
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method = LoadCurrMethod();
-      r_base = AllocTempWord();
+      r_base = AllocTempRef();
       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
       if (IsTemp(rl_method.reg)) {
         FreeTemp(rl_method.reg);
@@ -658,7 +661,7 @@
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method  = LoadCurrMethod();
-      r_base = AllocTempWord();
+      r_base = AllocTempRef();
       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
@@ -726,10 +729,10 @@
       GenSgetCall<4>(this, is_long_or_double, is_object, &field_info);
     }
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(rl_dest.fp);
+      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(rl_dest.fp);
+      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -766,7 +769,7 @@
       (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) {
     RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kCoreReg);
+    rl_obj = LoadValue(rl_obj, kRefReg);
     GenNullCheck(rl_obj.reg, opt_flags);
     RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
     int field_offset = field_info.FieldOffset().Int32Value();
@@ -793,10 +796,10 @@
       GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj);
     }
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(rl_dest.fp);
+      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(rl_dest.fp);
+      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -824,7 +827,7 @@
       (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) {
     RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kCoreReg);
+    rl_obj = LoadValue(rl_obj, kRefReg);
     if (is_long_or_double) {
       rl_src = LoadValueWide(rl_src, reg_class);
     } else {
@@ -881,7 +884,7 @@
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
   RegStorage res_reg = AllocTemp();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
                                                    *cu_->dex_file,
                                                    type_idx)) {
@@ -894,15 +897,15 @@
       CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                               type_idx, rl_method.reg, true);
     }
-    RegLocation rl_result = GetReturn(false);
+    RegLocation rl_result = GetReturn(kRefReg);
     StoreValue(rl_dest, rl_result);
   } else {
     // We're don't need access checks, load type from dex cache
     int32_t dex_cache_offset =
         mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
-    Load32Disp(rl_method.reg, dex_cache_offset, res_reg);
+    LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
-    Load32Disp(res_reg, offset_of_type, rl_result.reg);
+    LoadRefDisp(res_reg, offset_of_type, rl_result.reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
@@ -976,7 +979,7 @@
                 TargetReg(kArg0));
 
     // Might call out to helper, which will return resolved string in kRet0
-    Load32Disp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
+    LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
     LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
@@ -1010,13 +1013,13 @@
     }
 
     GenBarrier();
-    StoreValue(rl_dest, GetReturn(false));
+    StoreValue(rl_dest, GetReturn(kRefReg));
   } else {
     RegLocation rl_method = LoadCurrMethod();
-    RegStorage res_reg = AllocTemp();
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    RegStorage res_reg = AllocTempRef();
+    RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
     LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg);
-    Load32Disp(res_reg, offset_of_string, rl_result.reg);
+    LoadRefDisp(res_reg, offset_of_string, rl_result.reg);
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -1071,7 +1074,7 @@
     func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectWithAccessCheck);
     mir_to_lir->CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   }
-  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  RegLocation rl_result = mir_to_lir->GetReturn(kRefReg);
   mir_to_lir->StoreValue(rl_dest, rl_result);
 }
 
@@ -1103,7 +1106,7 @@
   // X86 has its own implementation.
   DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64);
 
-  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
   if (result_reg == object.reg) {
@@ -1112,8 +1115,8 @@
   LoadConstant(result_reg, 0);     // assume false
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kCoreReg);
-  RegStorage object_class = AllocTypedTemp(false, kCoreReg);
+  RegStorage check_class = AllocTypedTemp(false, kRefReg);
+  RegStorage object_class = AllocTypedTemp(false, kRefReg);
 
   LoadCurrMethodDirect(check_class);
   if (use_declaring_class) {
@@ -1206,7 +1209,7 @@
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kRefReg);
   if (cu_->instruction_set == kMips) {
     // On MIPS rArg0 != rl_result, place false in result if branch is taken.
     LoadConstant(rl_result.reg, 0);
@@ -1511,7 +1514,7 @@
   } else {
     GenShiftOpLongCall<4>(this, opcode, rl_src1, rl_shift);
   }
-  RegLocation rl_result = GetReturnWide(false);
+  RegLocation rl_result = GetReturnWide(kCoreReg);
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -1653,7 +1656,7 @@
         CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), false /* not a safepoint */);
       }
       if (op == kOpDiv)
-        rl_result = GetReturn(false);
+        rl_result = GetReturn(kCoreReg);
       else
         rl_result = GetReturnAlt();
     }
@@ -1918,7 +1921,7 @@
                                   false);
         }
         if (is_div)
-          rl_result = GetReturn(false);
+          rl_result = GetReturn(kCoreReg);
         else
           rl_result = GetReturnAlt();
       }
@@ -2081,7 +2084,7 @@
     }
     // Adjust return regs in to handle case of rem returning kArg2/kArg3
     if (ret_reg == mir_to_lir->TargetReg(kRet0).GetReg())
-      rl_result = mir_to_lir->GetReturnWide(false);
+      rl_result = mir_to_lir->GetReturnWide(kCoreReg);
     else
       rl_result = mir_to_lir->GetReturnWideAlt();
     mir_to_lir->StoreValueWide(rl_dest, rl_result);
@@ -2119,11 +2122,11 @@
   CallRuntimeHelperRegLocation(func_offset, rl_src, false);
   if (rl_dest.wide) {
     RegLocation rl_result;
-    rl_result = GetReturnWide(rl_dest.fp);
+    rl_result = GetReturnWide(LocToRegClass(rl_dest));
     StoreValueWide(rl_dest, rl_result);
   } else {
     RegLocation rl_result;
-    rl_result = GetReturn(rl_dest.fp);
+    rl_result = GetReturn(LocToRegClass(rl_dest));
     StoreValue(rl_dest, rl_result);
   }
 }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index eef3294..1817fd3 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1156,7 +1156,7 @@
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
-    res = GetReturn(false);
+    res = GetReturn(LocToRegClass(info->result));
   } else {
     res = info->result;
   }
@@ -1166,7 +1166,7 @@
 RegLocation Mir2Lir::InlineTargetWide(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
-    res = GetReturnWide(false);
+    res = GetReturnWide(kCoreReg);
   } else {
     res = info->result;
   }
@@ -1189,7 +1189,7 @@
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
-  rl_obj = LoadValue(rl_obj, kCoreReg);
+  rl_obj = LoadValue(rl_obj, kRefReg);
   // X86 wants to avoid putting a constant index into a register.
   if (!((cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64)&& rl_idx.is_const)) {
     rl_idx = LoadValue(rl_idx, kCoreReg);
@@ -1202,7 +1202,7 @@
   RegStorage reg_ptr;
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     reg_off = AllocTemp();
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
     if (range_check) {
       reg_max = AllocTemp();
       Load32Disp(rl_obj.reg, count_offset, reg_max);
@@ -1232,9 +1232,9 @@
       }
     }
     reg_off = AllocTemp();
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
     Load32Disp(rl_obj.reg, offset_offset, reg_off);
-    Load32Disp(rl_obj.reg, value_offset, reg_ptr);
+    LoadRefDisp(rl_obj.reg, value_offset, reg_ptr);
   }
   if (rl_idx.is_const) {
     OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
@@ -1271,7 +1271,7 @@
   }
   // dst = src.length();
   RegLocation rl_obj = info->args[0];
-  rl_obj = LoadValue(rl_obj, kCoreReg);
+  rl_obj = LoadValue(rl_obj, kRefReg);
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   GenNullCheck(rl_obj.reg, info->opt_flags);
@@ -1477,7 +1477,7 @@
     DCHECK_EQ(mir_graph_->ConstantValue(rl_char) & ~0xFFFF, 0);
     DCHECK(high_code_point_branch == nullptr);
   }
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
   StoreValue(rl_dest, rl_return);
   return true;
@@ -1523,7 +1523,7 @@
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
     }
   }
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
   StoreValue(rl_dest, rl_return);
   return true;
@@ -1575,7 +1575,7 @@
   rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
   RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info);  // result reg
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_long) {
@@ -1621,7 +1621,7 @@
     // There might have been a store before this volatile one so insert StoreStore barrier.
     GenMemBarrier(kStoreStore);
   }
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_value;
   if (is_long) {
@@ -1635,7 +1635,7 @@
       FreeTemp(rl_temp_offset);
     }
   } else {
-    rl_value = LoadValue(rl_src_value, kCoreReg);
+    rl_value = LoadValue(rl_src_value);
     StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
   }
 
@@ -1658,7 +1658,7 @@
     if (info->type != kStatic &&
         ((cu_->disable_opt & (1 << kNullCheckElimination)) != 0 ||
          (info->opt_flags & MIR_IGNORE_NULL_CHECK) == 0))  {
-      RegLocation rl_obj = LoadValue(info->args[0], kCoreReg);
+      RegLocation rl_obj = LoadValue(info->args[0], kRefReg);
       GenNullCheck(rl_obj.reg);
     }
     return;
@@ -1783,10 +1783,10 @@
   if (info->result.location != kLocInvalid) {
     // We have a following MOVE_RESULT - do it now.
     if (info->result.wide) {
-      RegLocation ret_loc = GetReturnWide(info->result.fp);
+      RegLocation ret_loc = GetReturnWide(LocToRegClass(info->result));
       StoreValueWide(info->result, ret_loc);
     } else {
-      RegLocation ret_loc = GetReturn(info->result.fp);
+      RegLocation ret_loc = GetReturn(LocToRegClass(info->result));
       StoreValue(info->result, ret_loc);
     }
   }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index f5e7e63..2c8b9b9 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -139,6 +139,7 @@
 }
 
 RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
+  DCHECK(!rl_src.ref || op_kind == kRefReg);
   rl_src = UpdateLoc(rl_src);
   if (rl_src.location == kLocPhysReg) {
     if (!RegClassMatches(op_kind, rl_src.reg)) {
@@ -162,6 +163,10 @@
   return rl_src;
 }
 
+RegLocation Mir2Lir::LoadValue(RegLocation rl_src) {
+  return LoadValue(rl_src, LocToRegClass(rl_src));
+}
+
 void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) {
   /*
    * Sanity checking - should never try to store to the same
@@ -366,7 +371,7 @@
 }
 
 RegLocation Mir2Lir::LoadCurrMethod() {
-  return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg);
+  return LoadValue(mir_graph_->GetMethodLoc(), kRefReg);
 }
 
 RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 3af3715..e1bdb2e 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -261,11 +261,11 @@
 
 void MipsMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  RegStorage reset_reg = AllocTempRef();
+  LoadRefDisp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rMIPS_SELF, ex_offset, reset_reg);
+  StoreRefDisp(rs_rMIPS_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e462173..ea3c901 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 9fffb2f..4e31477 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -52,7 +52,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -95,7 +95,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
@@ -204,7 +204,7 @@
   RegStorage r_tgt = LoadHelper(offset);
   // NOTE: not a safepoint
   OpReg(kOpBlx, r_tgt);
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kCoreReg);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 55cf434..fcf5f94 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -66,6 +66,10 @@
   return mips_loc_c_return;
 }
 
+RegLocation MipsMir2Lir::LocCReturnRef() {
+  return mips_loc_c_return;
+}
+
 RegLocation MipsMir2Lir::LocCReturnWide() {
   return mips_loc_c_return_wide;
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index df56820..9621995 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -23,6 +23,36 @@
 
 namespace art {
 
+RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) {
+  RegisterClass res;
+  switch (shorty_type) {
+    case 'L':
+      res = kRefReg;
+      break;
+    case 'F':
+      // Expected fallthrough.
+    case 'D':
+      res = kFPReg;
+      break;
+    default:
+      res = kCoreReg;
+  }
+  return res;
+}
+
+RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) {
+  RegisterClass res;
+  if (loc.fp) {
+    DCHECK(!loc.ref) << "At most, one of ref/fp may be set";
+    res = kFPReg;
+  } else if (loc.ref) {
+    res = kRefReg;
+  } else {
+    res = kCoreReg;
+  }
+  return res;
+}
+
 void Mir2Lir::LockArg(int in_position, bool wide) {
   RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
   RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
@@ -149,15 +179,13 @@
     return false;
   }
 
-  // The inliner doesn't distinguish kDouble or kFloat, use shorty.
-  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
-
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.object_arg);
-  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
-  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
+  RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
+  RegLocation rl_dest = wide ? GetReturnWide(ret_reg_class) : GetReturn(ret_reg_class);
   RegStorage r_result = rl_dest.reg;
   if (!RegClassMatches(reg_class, r_result)) {
     r_result = wide ? AllocTypedTempWide(rl_dest.fp, reg_class)
@@ -205,7 +233,7 @@
   GenPrintLabel(mir);
   LockArg(data.object_arg);
   LockArg(data.src_arg, wide);
-  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
+  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegStorage reg_src = LoadArg(data.src_arg, reg_class, wide);
   if (data.is_volatile) {
@@ -226,13 +254,12 @@
 bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) {
   const InlineReturnArgData& data = special.d.return_data;
   bool wide = (data.is_wide != 0u);
-  // The inliner doesn't distinguish kDouble or kFloat, use shorty.
-  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.arg, wide);
-  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]);
+  RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class);
   LoadArgDirect(data.arg, rl_dest);
   return true;
 }
@@ -254,7 +281,7 @@
       break;
     case kInlineOpNonWideConst: {
       successful = true;
-      RegLocation rl_dest = GetReturn(cu_->shorty[0] == 'F');
+      RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0]));
       GenPrintLabel(mir);
       LoadConstant(rl_dest.reg, static_cast<int>(special.d.data));
       return_mir = bb->GetNextUnconditionalMir(mir_graph_, mir);
@@ -377,26 +404,30 @@
       }
       break;
 
-    case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
+      DCHECK(rl_src[0].ref);
+      // Intentional fallthrough.
+    case Instruction::RETURN:
       if (!mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
-      StoreValue(GetReturn(cu_->shorty[0] == 'F'), rl_src[0]);
+      DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
+      StoreValue(GetReturn(LocToRegClass(rl_src[0])), rl_src[0]);
       break;
 
     case Instruction::RETURN_WIDE:
       if (!mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
-      StoreValueWide(GetReturnWide(cu_->shorty[0] == 'D'), rl_src[0]);
+      DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
+      StoreValueWide(GetReturnWide(LocToRegClass(rl_src[0])), rl_src[0]);
       break;
 
     case Instruction::MOVE_RESULT_WIDE:
       if ((opt_flags & MIR_INLINED) != 0) {
         break;  // Nop - combined w/ previous invoke.
       }
-      StoreValueWide(rl_dest, GetReturnWide(rl_dest.fp));
+      StoreValueWide(rl_dest, GetReturnWide(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE_RESULT:
@@ -404,7 +435,7 @@
       if ((opt_flags & MIR_INLINED) != 0) {
         break;  // Nop - combined w/ previous invoke.
       }
-      StoreValue(rl_dest, GetReturn(rl_dest.fp));
+      StoreValue(rl_dest, GetReturn(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE:
@@ -474,7 +505,7 @@
     case Instruction::ARRAY_LENGTH:
       int len_offset;
       len_offset = mirror::Array::LengthOffset().Int32Value();
-      rl_src[0] = LoadValue(rl_src[0], kCoreReg);
+      rl_src[0] = LoadValue(rl_src[0], kRefReg);
       GenNullCheck(rl_src[0].reg, opt_flags);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       Load32Disp(rl_src[0].reg, len_offset, rl_result.reg);
@@ -782,7 +813,7 @@
 
     case Instruction::LONG_TO_INT:
       rl_src[0] = UpdateLocWide(rl_src[0]);
-      rl_src[0] = WideToNarrow(rl_src[0]);
+      rl_src[0] = NarrowRegLoc(rl_src[0]);
       StoreValue(rl_dest, rl_src[0]);
       break;
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 8d572ca..1281d45 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -456,6 +456,8 @@
       int next_sp_reg_;
       GrowableArray<RegisterInfo*> dp_regs_;    // Double precision float.
       int next_dp_reg_;
+      GrowableArray<RegisterInfo*>* ref_regs_;  // Points to core_regs_ or core64_regs_
+      int* next_ref_reg_;
 
      private:
       Mir2Lir* const m2l_;
@@ -550,8 +552,12 @@
      * just use our knowledge of type to select the most appropriate register class?
      */
     RegisterClass RegClassBySize(OpSize size) {
-      return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
-              size == kSignedByte) ? kCoreReg : kAnyReg;
+      if (size == kReference) {
+        return kRefReg;
+      } else {
+        return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
+                size == kSignedByte) ? kCoreReg : kAnyReg;
+      }
     }
 
     size_t CodeBufferSizeInBytes() {
@@ -612,6 +618,8 @@
       return current_dalvik_offset_;
     }
 
+    RegisterClass ShortyToRegClass(char shorty_type);
+    RegisterClass LocToRegClass(RegLocation loc);
     int ComputeFrameSize();
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
@@ -699,7 +707,7 @@
     virtual RegStorage AllocFreeTemp();
     virtual RegStorage AllocTemp();
     virtual RegStorage AllocTempWide();
-    virtual RegStorage AllocTempWord();
+    virtual RegStorage AllocTempRef();
     virtual RegStorage AllocTempSingle();
     virtual RegStorage AllocTempDouble();
     virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
@@ -719,7 +727,6 @@
     void NullifyRange(RegStorage reg, int s_reg);
     void MarkDef(RegLocation rl, LIR *start, LIR *finish);
     void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
-    virtual RegLocation WideToNarrow(RegLocation rl);
     void ResetDefLoc(RegLocation rl);
     void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
@@ -764,8 +771,8 @@
     void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
-    RegLocation GetReturnWide(bool is_double);
-    RegLocation GetReturn(bool is_float);
+    RegLocation GetReturnWide(RegisterClass reg_class);
+    RegLocation GetReturn(RegisterClass reg_class);
     RegisterInfo* GetRegInfo(RegStorage reg);
 
     // Shared by all targets - implemented in gen_common.cc.
@@ -973,6 +980,8 @@
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
+    // Same as above, but derive the target register class from the location record.
+    virtual RegLocation LoadValue(RegLocation rl_src);
     // Load Dalvik value with 64-bit memory storage.
     virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
@@ -1122,6 +1131,7 @@
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
+    virtual RegLocation LocCReturnRef() = 0;
     virtual RegLocation LocCReturnDouble() = 0;
     virtual RegLocation LocCReturnFloat() = 0;
     virtual RegLocation LocCReturnWide() = 0;
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 058b89c..2303c62 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -128,6 +128,15 @@
   // Add an entry for InvalidReg with zero'd mask.
   RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0);
   m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg);
+
+  // Existence of core64 registers implies wide references.
+  if (core64_regs_.Size() != 0) {
+    ref_regs_ = &core64_regs_;
+    next_ref_reg_ = &next_core64_reg_;
+  } else {
+    ref_regs_ = &core_regs_;
+    next_ref_reg_ = &next_core_reg_;
+  }
 }
 
 void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) {
@@ -145,6 +154,7 @@
 
 void Mir2Lir::DumpCoreRegPool() {
   DumpRegPool(&reg_pool_->core_regs_);
+  DumpRegPool(&reg_pool_->core64_regs_);
 }
 
 void Mir2Lir::DumpFpRegPool() {
@@ -274,6 +284,7 @@
 
 /* Reserve a callee-save register.  Return InvalidReg if none available */
 RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
+  // TODO: 64-bit and refreg update
   RegStorage res;
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
@@ -406,17 +417,10 @@
   return res;
 }
 
-RegStorage Mir2Lir::AllocTempWord() {
-  // FIXME: temporary workaround.  For bring-up purposes, x86_64 needs the ability
-  // to allocate wide values as a pair of core registers.  However, we can't hold
-  // a reference in a register pair.  This workaround will be removed when the
-  // reference handling code is reworked, or x86_64 backend starts using wide core
-  // registers - whichever happens first.
-  if (cu_->instruction_set == kX86_64) {
-    return AllocTemp();
-  } else {
-    return (Is64BitInstructionSet(cu_->instruction_set)) ? AllocTempWide() : AllocTemp();
-  }
+RegStorage Mir2Lir::AllocTempRef() {
+  RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, true);
+  DCHECK(!res.IsPair());
+  return res;
 }
 
 RegStorage Mir2Lir::AllocTempSingle() {
@@ -432,6 +436,7 @@
 }
 
 RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+  DCHECK_NE(reg_class, kRefReg);  // NOTE: the Dalvik width of a reference is always 32 bits.
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     return AllocTempDouble();
   }
@@ -441,6 +446,8 @@
 RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     return AllocTempSingle();
+  } else if (reg_class == kRefReg) {
+    return AllocTempRef();
   }
   return AllocTemp();
 }
@@ -459,8 +466,10 @@
 
 RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
   RegStorage reg;
-  // TODO: might be worth a sanity check here to verify at most 1 live reg per s_reg.
-  if ((reg_class == kAnyReg) || (reg_class == kFPReg)) {
+  if (reg_class == kRefReg) {
+    reg = FindLiveReg(*reg_pool_->ref_regs_, s_reg);
+  }
+  if (!reg.Valid() && ((reg_class == kAnyReg) || (reg_class == kFPReg))) {
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
@@ -675,39 +684,6 @@
   p->SetDefEnd(finish);
 }
 
-RegLocation Mir2Lir::WideToNarrow(RegLocation rl) {
-  DCHECK(rl.wide);
-  if (rl.location == kLocPhysReg) {
-    if (rl.reg.IsPair()) {
-      RegisterInfo* info_lo = GetRegInfo(rl.reg.GetLow());
-      RegisterInfo* info_hi = GetRegInfo(rl.reg.GetHigh());
-      if (info_lo->IsTemp()) {
-        info_lo->SetIsWide(false);
-        info_lo->ResetDefBody();
-      }
-      if (info_hi->IsTemp()) {
-        info_hi->SetIsWide(false);
-        info_hi->ResetDefBody();
-      }
-      rl.reg = rl.reg.GetLow();
-    } else {
-      /*
-       * TODO: If not a pair, we can't just drop the high register.  On some targets, we may be
-       * able to re-cast the 64-bit register as 32 bits, so it might be worthwhile to revisit
-       * this code.  Will probably want to make this a virtual function.
-       */
-      // Can't narrow 64-bit register.  Clobber.
-      if (GetRegInfo(rl.reg)->IsTemp()) {
-        Clobber(rl.reg);
-        FreeTemp(rl.reg);
-      }
-      rl.location = kLocDalvikFrame;
-    }
-  }
-  rl.wide = false;
-  return rl;
-}
-
 void Mir2Lir::ResetDefLoc(RegLocation rl) {
   DCHECK(!rl.wide);
   if (IsTemp(rl.reg) && !(cu_->disable_opt & (1 << kSuppressLoads))) {
@@ -727,16 +703,8 @@
 }
 
 void Mir2Lir::ResetDefTracking() {
-  GrowableArray<RegisterInfo*>::Iterator core_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = core_it.Next(); info != nullptr; info = core_it.Next()) {
-    info->ResetDefBody();
-  }
-  GrowableArray<RegisterInfo*>::Iterator sp_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = sp_it.Next(); info != nullptr; info = sp_it.Next()) {
-    info->ResetDefBody();
-  }
-  GrowableArray<RegisterInfo*>::Iterator dp_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = dp_it.Next(); info != nullptr; info = dp_it.Next()) {
+  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
+  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
     info->ResetDefBody();
   }
 }
@@ -811,7 +779,11 @@
 bool Mir2Lir::RegClassMatches(int reg_class, RegStorage reg) {
   if (reg_class == kAnyReg) {
     return true;
-  } else if (reg_class == kCoreReg) {
+  } else if ((reg_class == kCoreReg) || (reg_class == kRefReg)) {
+    /*
+     * For this purpose, consider Core and Ref to be the same class. We aren't dealing
+     * with width here - that should be checked at a higher level (if needed).
+     */
     return !reg.IsFloat();
   } else {
     return reg.IsFloat();
@@ -1347,20 +1319,26 @@
 }
 
 /* Mark register usage state and return long retloc */
-RegLocation Mir2Lir::GetReturnWide(bool is_double) {
-  RegLocation gpr_res = LocCReturnWide();
-  RegLocation fpr_res = LocCReturnDouble();
-  RegLocation res = is_double ? fpr_res : gpr_res;
+RegLocation Mir2Lir::GetReturnWide(RegisterClass reg_class) {
+  RegLocation res;
+  switch (reg_class) {
+    case kRefReg: LOG(FATAL); break;
+    case kFPReg: res = LocCReturnDouble(); break;
+    default: res = LocCReturnWide(); break;
+  }
   Clobber(res.reg);
   LockTemp(res.reg);
   MarkWide(res.reg);
   return res;
 }
 
-RegLocation Mir2Lir::GetReturn(bool is_float) {
-  RegLocation gpr_res = LocCReturn();
-  RegLocation fpr_res = LocCReturnFloat();
-  RegLocation res = is_float ? fpr_res : gpr_res;
+RegLocation Mir2Lir::GetReturn(RegisterClass reg_class) {
+  RegLocation res;
+  switch (reg_class) {
+    case kRefReg: res = LocCReturnRef(); break;
+    case kFPReg: res = LocCReturnFloat(); break;
+    default: res = LocCReturn(); break;
+  }
   Clobber(res.reg);
   if (cu_->instruction_set == kMips) {
     MarkInUse(res.reg);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 4673cc0..f363eb3 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -169,7 +169,7 @@
   int ex_offset = Is64BitInstructionSet(cu_->instruction_set) ?
       Thread::ExceptionOffset<8>().Int32Value() :
       Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   NewLIR2(kX86Mov32RT, rl_result.reg.GetReg(), ex_offset);
   NewLIR2(kX86Mov32TI, ex_offset, 0);
   StoreValue(rl_dest, rl_result);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index d66790d..648c148 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index aec39ab..0421a59 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -56,7 +56,7 @@
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                                 false);
       }
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -118,7 +118,7 @@
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                                 false);
       }
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 48bff6e..1cc16b9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -173,7 +173,10 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  // Avoid using float regs here.
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ConditionCode ccode = mir->meta.ccode;
 
   // The kMirOpSelect has two variants, one for constants and one for moves.
@@ -182,7 +185,7 @@
   if (is_constant_case) {
     int true_val = mir->dalvikInsn.vB;
     int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
     /*
      * For ccode == kCondEq:
@@ -203,6 +206,8 @@
      *     mov t1, $true_case
      *     cmovz result_reg, t1
      */
+    // FIXME: depending on how you use registers you could get a false != mismatch when dealing
+    // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
     const bool result_reg_same_as_src =
         (rl_src.location == kLocPhysReg && rl_src.reg.GetReg() == rl_result.reg.GetReg());
     const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
@@ -224,7 +229,7 @@
     if (true_zero_case || false_zero_case || catch_all_case) {
       ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
       int immediateForTemp = true_zero_case ? false_val : true_val;
-      RegStorage temp1_reg = AllocTemp();
+      RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
       OpRegImm(kOpMov, temp1_reg, immediateForTemp);
 
       OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
@@ -234,9 +239,9 @@
   } else {
     RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
     RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
     /*
      * For ccode == kCondEq:
@@ -792,8 +797,8 @@
     Clobber(rs_r0);
     LockTemp(rs_r0);
 
-    RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
-    RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
+    RegLocation rl_new_value = LoadValue(rl_src_new_value);
 
     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
       // Mark card for object assuming new value is stored.
@@ -1441,7 +1446,7 @@
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   RegLocation rl_result;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
 
   int data_offset;
   if (size == k64 || size == kDouble) {
@@ -1497,7 +1502,7 @@
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   bool constant_index = rl_index.is_const;
   int32_t constant_index_value = 0;
   if (!constant_index) {
@@ -1880,7 +1885,7 @@
 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                                     RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
@@ -1894,7 +1899,7 @@
   LoadConstant(result_reg, 0);
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kCoreReg);
+  RegStorage check_class = AllocTypedTemp(false, kRefReg);
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
@@ -1972,8 +1977,8 @@
     LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                  class_reg);
     int32_t offset_of_type =
-        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>)
-        * type_idx);
+        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
+        (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
     LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime.
@@ -1992,7 +1997,7 @@
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kRefReg);
 
   // SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 8b34168..79081c8 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -177,6 +177,11 @@
   return x86_loc_c_return;
 }
 
+RegLocation X86Mir2Lir::LocCReturnRef() {
+  // FIXME: return x86_loc_c_return_wide for x86_64 when wide refs supported.
+  return x86_loc_c_return;
+}
+
 RegLocation X86Mir2Lir::LocCReturnWide() {
   return x86_loc_c_return_wide;
 }
@@ -981,7 +986,7 @@
   }
 
   // Okay, we are commited to inlining this.
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
 
   // Is the string non-NULL?