Update load/store utilities for 64-bit backends
This CL replaces the typical use of LoadWord/StoreWord
utilities (which, in practice, were 32-bit load/store) in
favor of a new set that make the size explicit. We now have:
LoadWordDisp/StoreWordDisp:
32 or 64 depending on target. Load or store the natural
word size. Expect this to be used infrequently - generally
when we know we're dealing with a native pointer or flushed
register not holding a Dalvik value (Dalvik values will flush
to home location sizes based on Dalvik, rather than the target).
Load32Disp/Store32Disp:
Load or store 32 bits, regardless of target.
Load64Disp/Store64Disp:
Load or store 64 bits, regardless of target.
LoadRefDisp:
Load a 32-bit compressed reference, and expand it to the
natural word size in the target register.
StoreRefDisp:
Compress a reference held in a register of the natural word
size and store it as a 32-bit compressed reference.
Change-Id: I50fcbc8684476abd9527777ee7c152c61ba41c6f
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d0d0e6b..b374ed8 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -128,7 +128,7 @@
// Load the displacement from the switch table
RegStorage disp_reg = AllocTemp();
- LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord);
+ LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32);
// ..and go! NOTE: No instruction set switch here - must stay Thumb2
LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
@@ -180,6 +180,7 @@
*/
void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
FlushAllRegs();
+ // FIXME: need separate LoadValues for object references.
LoadValueDirectFixed(rl_src, rs_r0); // Get obj
LockCallTemps(); // Prepare for explicit register usage
constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15.
@@ -193,7 +194,7 @@
null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
}
}
- LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+ Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
MarkPossibleNullPointerException(opt_flags);
LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
@@ -219,7 +220,7 @@
} else {
// Explicit null-check as slow-path is entered using an IT.
GenNullCheck(rs_r0, opt_flags);
- LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+ Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2);
MarkPossibleNullPointerException(opt_flags);
OpRegImm(kOpCmp, rs_r1, 0);
@@ -248,7 +249,7 @@
LoadValueDirectFixed(rl_src, rs_r0); // Get obj
LockCallTemps(); // Prepare for explicit register usage
LIR* null_check_branch = nullptr;
- LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+ Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15.
if (kArchVariantHasGoodBranchPredictor) {
if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -259,11 +260,11 @@
null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
}
}
- LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+ Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
MarkPossibleNullPointerException(opt_flags);
LoadConstantNoClobber(rs_r3, 0);
LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
- StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+ Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
@@ -284,14 +285,14 @@
} else {
// Explicit null-check as slow-path is entered using an IT.
GenNullCheck(rs_r0, opt_flags);
- LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock
+ Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock
MarkPossibleNullPointerException(opt_flags);
- LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+ Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
LoadConstantNoClobber(rs_r3, 0);
// Is lock unheld on lock or held by us (==thread_id) on unlock?
OpRegReg(kOpCmp, rs_r1, rs_r2);
LIR* it = OpIT(kCondEq, "EE");
- StoreWordDisp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+ Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
// Go expensive route - UnlockObjectFromCode(obj);
LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
rs_rARM_LR);
@@ -307,9 +308,9 @@
int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
RegStorage reset_reg = AllocTemp();
- LoadWordDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
+ Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
LoadConstant(reset_reg, 0);
- StoreWordDisp(rs_rARM_SELF, ex_offset, reset_reg);
+ Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
FreeTemp(reset_reg);
StoreValue(rl_dest, rl_result);
}
@@ -354,7 +355,7 @@
if (!skip_overflow_check) {
if (Runtime::Current()->ExplicitStackOverflowChecks()) {
/* Load stack limit */
- LoadWordDisp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+ Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
}
}
/* Spill core callee saves */
@@ -391,6 +392,7 @@
ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
// Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
// codegen and target are in thumb2 mode.
+ // NOTE: native pointer.
m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
}
@@ -421,7 +423,7 @@
// a sub instruction. Otherwise we will get a temp allocation and the
// code size will increase.
OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
- LoadWordDisp(rs_r12, 0, rs_r12);
+ Load32Disp(rs_r12, 0, rs_r12);
MarkPossibleStackOverflowException();
OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
}