Update load/store utilities for 64-bit backends

This CL replaces the typical use of LoadWord/StoreWord
utilities (which, in practice, were 32-bit load/store) in
favor of a new set that make the size explicit.  We now have:

   LoadWordDisp/StoreWordDisp:
    32 or 64 depending on target.  Load or store the natural
    word size.  Expect this to be used infrequently - generally
    when we know we're dealing with a native pointer or flushed
    register not holding a Dalvik value (Dalvik values will flush
    to home location sizes based on Dalvik, rather than the target).

   Load32Disp/Store32Disp:
     Load or store 32 bits, regardless of target.

   Load64Disp/Store64Disp:
     Load or store 64 bits, regardless of target.

   LoadRefDisp:
     Load a 32-bit compressed reference, and expand it to the
     natural word size in the target register.

   StoreRefDisp:
     Compress a reference held in a register of the natural word
     size and store it as a 32-bit compressed reference.

Change-Id: I50fcbc8684476abd9527777ee7c152c61ba41c6f
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d0d0e6b..b374ed8 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -128,7 +128,7 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord);
+  LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32);
 
   // ..and go! NOTE: No instruction set switch here - must stay Thumb2
   LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
@@ -180,6 +180,7 @@
  */
 void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
+  // FIXME: need separate LoadValues for object references.
   LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
@@ -193,7 +194,7 @@
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
-    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
     LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
@@ -219,7 +220,7 @@
   } else {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
-    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
     NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
     MarkPossibleNullPointerException(opt_flags);
     OpRegImm(kOpCmp, rs_r1, 0);
@@ -248,7 +249,7 @@
   LoadValueDirectFixed(rl_src, rs_r0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
-  LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+  Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
   constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
   if (kArchVariantHasGoodBranchPredictor) {
     if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -259,11 +260,11 @@
         null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
       }
     }
-    LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
     MarkPossibleNullPointerException(opt_flags);
     LoadConstantNoClobber(rs_r3, 0);
     LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
-    StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+    Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
     LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
 
     LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
@@ -284,14 +285,14 @@
   } else {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_r0, opt_flags);
-    LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
+    Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);  // Get lock
     MarkPossibleNullPointerException(opt_flags);
-    LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+    Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
     LoadConstantNoClobber(rs_r3, 0);
     // Is lock unheld on lock or held by us (==thread_id) on unlock?
     OpRegReg(kOpCmp, rs_r1, rs_r2);
     LIR* it = OpIT(kCondEq, "EE");
-    StoreWordDisp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+    Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
     // Go expensive route - UnlockObjectFromCode(obj);
     LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
                        rs_rARM_LR);
@@ -307,9 +308,9 @@
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage reset_reg = AllocTemp();
-  LoadWordDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
+  Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  StoreWordDisp(rs_rARM_SELF, ex_offset, reset_reg);
+  Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
@@ -354,7 +355,7 @@
   if (!skip_overflow_check) {
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
       /* Load stack limit */
-      LoadWordDisp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+      Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
     }
   }
   /* Spill core callee saves */
@@ -391,6 +392,7 @@
           ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
           // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
           // codegen and target are in thumb2 mode.
+          // NOTE: native pointer.
           m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
         }
 
@@ -421,7 +423,7 @@
       // a sub instruction.  Otherwise we will get a temp allocation and the
       // code size will increase.
       OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
-      LoadWordDisp(rs_r12, 0, rs_r12);
+      Load32Disp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index c876b3a..eae5c69 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -684,18 +684,18 @@
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  if (size == kLong) {
+  if (size == k64) {
     // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
     if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
-      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
-      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
+      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
     } else {
-      LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
-      LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
+      Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
+      Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
     }
     StoreValueWide(rl_dest, rl_result);
   } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
     LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
     StoreValue(rl_dest, rl_result);
@@ -708,13 +708,13 @@
   rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
   RegLocation rl_src_value = info->args[2];  // [size] value
   RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
-  if (size == kLong) {
+  if (size == k64) {
     // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0.
     RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), kWord);
-    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), kWord);
+    StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), k32);
+    StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), k32);
   } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord);
+    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0.
     RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
     StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size);
@@ -1148,7 +1148,7 @@
   if (needs_range_check) {
     reg_len = AllocTemp();
     /* Get len */
-    LoadWordDisp(rl_array.reg, len_offset, reg_len);
+    Load32Disp(rl_array.reg, len_offset, reg_len);
     MarkPossibleNullPointerException(opt_flags);
   } else {
     ForceImplicitNullCheck(rl_array.reg, opt_flags);
@@ -1217,7 +1217,7 @@
   bool constant_index = rl_index.is_const;
 
   int data_offset;
-  if (size == kLong || size == kDouble) {
+  if (size == k64 || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
@@ -1254,7 +1254,7 @@
     reg_len = AllocTemp();
     // NOTE: max live temps(4) here.
     /* Get len */
-    LoadWordDisp(rl_array.reg, len_offset, reg_len);
+    Load32Disp(rl_array.reg, len_offset, reg_len);
     MarkPossibleNullPointerException(opt_flags);
   } else {
     ForceImplicitNullCheck(rl_array.reg, opt_flags);
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 1053a8f..305e89b 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -656,7 +656,7 @@
   if (info->live && info->dirty) {
     info->dirty = false;
     int v_reg = mir_graph_->SRegToVReg(info->s_reg);
-    StoreBaseDisp(rs_rARM_SP, VRegOffset(v_reg), reg, kWord);
+    StoreBaseDisp(rs_rARM_SP, VRegOffset(v_reg), reg, k32);
   }
 }
 
@@ -738,8 +738,8 @@
 
 LIR* ArmMir2Lir::CheckSuspendUsingLoad() {
   RegStorage tmp = rs_r0;
-  LoadWordDisp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
-  LIR* load2 = LoadWordDisp(tmp, 0, tmp);
+  Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
+  LIR* load2 = Load32Disp(tmp, 0, tmp);
   return load2;
 }
 
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 70cbdd2..6879ffc 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -699,23 +699,24 @@
 
   if (ARM_FPREG(r_dest.GetReg())) {
     if (ARM_SINGLEREG(r_dest.GetReg())) {
-      DCHECK((size == kWord) || (size == kSingle));
+      DCHECK((size == k32) || (size == kSingle));
       opcode = kThumb2Vldrs;
       size = kSingle;
     } else {
       DCHECK(ARM_DOUBLEREG(r_dest.GetReg()));
-      DCHECK((size == kLong) || (size == kDouble));
+      DCHECK((size == k64) || (size == kDouble));
       DCHECK_EQ((r_dest.GetReg() & 0x1), 0);
       opcode = kThumb2Vldrd;
       size = kDouble;
     }
   } else {
     if (size == kSingle)
-      size = kWord;
+      size = k32;
   }
 
   switch (size) {
     case kDouble:  // fall-through
+    // Intentional fall-though.
     case kSingle:
       reg_ptr = AllocTemp();
       if (scale) {
@@ -727,7 +728,9 @@
       load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0);
       FreeTemp(reg_ptr);
       return load;
-    case kWord:
+    case k32:
+    // Intentional fall-though.
+    case kReference:
       opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR;
       break;
     case kUnsignedHalf:
@@ -764,23 +767,24 @@
 
   if (ARM_FPREG(r_src.GetReg())) {
     if (ARM_SINGLEREG(r_src.GetReg())) {
-      DCHECK((size == kWord) || (size == kSingle));
+      DCHECK((size == k32) || (size == kSingle));
       opcode = kThumb2Vstrs;
       size = kSingle;
     } else {
       DCHECK(ARM_DOUBLEREG(r_src.GetReg()));
-      DCHECK((size == kLong) || (size == kDouble));
+      DCHECK((size == k64) || (size == kDouble));
       DCHECK_EQ((r_src.GetReg() & 0x1), 0);
       opcode = kThumb2Vstrd;
       size = kDouble;
     }
   } else {
     if (size == kSingle)
-      size = kWord;
+      size = k32;
   }
 
   switch (size) {
     case kDouble:  // fall-through
+    // Intentional fall-though.
     case kSingle:
       reg_ptr = AllocTemp();
       if (scale) {
@@ -792,14 +796,18 @@
       store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0);
       FreeTemp(reg_ptr);
       return store;
-    case kWord:
+    case k32:
+    // Intentional fall-though.
+    case kReference:
       opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR;
       break;
     case kUnsignedHalf:
+    // Intentional fall-though.
     case kSignedHalf:
       opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR;
       break;
     case kUnsignedByte:
+    // Intentional fall-though.
     case kSignedByte:
       opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR;
       break;
@@ -832,7 +840,8 @@
   bool null_pointer_safepoint = false;
   switch (size) {
     case kDouble:
-    case kLong:
+    // Intentional fall-though.
+    case k64:
       if (ARM_FPREG(dest_low_reg)) {
         // Note: following change to avoid using pairs for doubles, replace conversion w/ DCHECK.
         if (r_dest.IsPair()) {
@@ -849,15 +858,18 @@
           load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
                          displacement >> 2);
         } else {
-          load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), kWord, s_reg);
+          load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32, s_reg);
           null_pointer_safepoint = true;
-          LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), kWord, INVALID_SREG);
+          LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32, INVALID_SREG);
         }
         already_generated = true;
       }
       break;
     case kSingle:
-    case kWord:
+    // Intentional fall-though.
+    case k32:
+    // Intentional fall-though.
+    case kReference:
       if (ARM_FPREG(r_dest.GetReg())) {
         opcode = kThumb2Vldrs;
         if (displacement <= 1020) {
@@ -953,13 +965,17 @@
 
 LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
                               int s_reg) {
-  DCHECK(!((size == kLong) || (size == kDouble)));
+  DCHECK(!((size == k64) || (size == kDouble)));
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
   return LoadBaseDispBody(r_base, displacement, r_dest, size, s_reg);
 }
 
 LIR* ArmMir2Lir::LoadBaseDispWide(RegStorage r_base, int displacement, RegStorage r_dest,
                                   int s_reg) {
-  return LoadBaseDispBody(r_base, displacement, r_dest, kLong, s_reg);
+  return LoadBaseDispBody(r_base, displacement, r_dest, k64, s_reg);
 }
 
 
@@ -975,16 +991,16 @@
   int src_low_reg = r_src.IsPair() ? r_src.GetLowReg() : r_src.GetReg();
   bool null_pointer_safepoint = false;
   switch (size) {
-    case kLong:
+    case k64:
     case kDouble:
       if (!ARM_FPREG(src_low_reg)) {
         if (displacement <= 1020) {
           store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(),
                           displacement >> 2);
         } else {
-          store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), kWord);
+          store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32);
           null_pointer_safepoint = true;
-          StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), kWord);
+          StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32);
         }
         already_generated = true;
       } else {
@@ -1001,7 +1017,8 @@
       }
       break;
     case kSingle:
-    case kWord:
+    case k32:
+    case kReference:
       if (ARM_FPREG(r_src.GetReg())) {
         DCHECK(ARM_SINGLEREG(r_src.GetReg()));
         opcode = kThumb2Vstrs;
@@ -1082,12 +1099,16 @@
 
 LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                                OpSize size) {
-  DCHECK(!((size == kLong) || (size == kDouble)));
+  // TODO: base this on target.
+  if (size == kWord) {
+    size = k32;
+  }
+  DCHECK(!((size == k64) || (size == kDouble)));
   return StoreBaseDispBody(r_base, displacement, r_src, size);
 }
 
 LIR* ArmMir2Lir::StoreBaseDispWide(RegStorage r_base, int displacement, RegStorage r_src) {
-  return StoreBaseDispBody(r_base, displacement, r_src, kLong);
+  return StoreBaseDispBody(r_base, displacement, r_src, k64);
 }
 
 LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {