AArch64: Enable Inlining.
This patch fixes the remaining issues with inlining for ARM64.
Change-Id: I2d85b7c4f3fb2b667bf6029fbc271ab954378889
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
Signed-off-by: Matteo Franchin <matteo.franchin@arm.com>
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 07bbf15..8021fa4 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -868,7 +868,8 @@
cu.disable_opt |= (1 << kLoadStoreElimination);
} else if (cu.instruction_set == kArm64) {
// TODO(Arm64): enable optimizations once backend is mature enough.
- cu.disable_opt = ~(uint32_t)0;
+ cu.disable_opt = ~((1 << kSuppressMethodInlining) |
+ (1 << kNullCheckElimination));
}
cu.StartTimingSplit("BuildMIRGraph");
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index b070c8a..294b9ea 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -177,6 +177,7 @@
RegLocation rl_src2);
void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
bool GenInlinedReverseBits(CallInfo* info, OpSize size);
+ bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 0f9de5b..6594c4b 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -323,6 +323,16 @@
StoreValueWide(rl_dest, rl_result);
}
+bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+ RegLocation rl_src = info->args[0];
+ rl_src = LoadValueWide(rl_src, kCoreReg);
+ RegLocation rl_dest = InlineTargetWide(info);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62);
+ StoreValueWide(rl_dest, rl_result);
+ return true;
+}
+
bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
RegLocation rl_src = info->args[0];
RegLocation rl_dest = InlineTargetWide(info); // double place for result
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index bab5499..86dddae 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -451,9 +451,8 @@
bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
RegLocation rl_src_address = info->args[0]; // long address
- rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] ?
- RegLocation rl_dest = InlineTarget(info);
- RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg
+ RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
+ RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
@@ -468,9 +467,8 @@
bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
RegLocation rl_src_address = info->args[0]; // long address
- rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1]
RegLocation rl_src_value = info->args[2]; // [size] value
- RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg
+ RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
RegLocation rl_value;
if (size == k64) {
@@ -497,11 +495,9 @@
bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
DCHECK_EQ(cu_->instruction_set, kArm64);
- ArmOpcode wide = is_long ? WIDE(0) : UNWIDE(0);
// Unused - RegLocation rl_src_unsafe = info->args[0];
RegLocation rl_src_obj = info->args[1]; // Object - known non-null
RegLocation rl_src_offset = info->args[2]; // long low
- rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] //TODO: do we really need this
RegLocation rl_src_expected = info->args[4]; // int, long or Object
// If is_long, high half is in info->args[5]
RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
@@ -510,7 +506,7 @@
// Load Object and offset
RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
- RegLocation rl_offset = LoadValue(rl_src_offset, kRefReg);
+ RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
RegLocation rl_new_value;
RegLocation rl_expected;
@@ -542,28 +538,38 @@
// result = tmp != 0;
RegStorage r_tmp;
+ RegStorage r_tmp_stored;
+ RegStorage rl_new_value_stored = rl_new_value.reg;
+ ArmOpcode wide = UNWIDE(0);
if (is_long) {
- r_tmp = AllocTempWide();
+ r_tmp_stored = r_tmp = AllocTempWide();
+ wide = WIDE(0);
} else if (is_object) {
+ // References use 64-bit registers, but are stored as compressed 32-bit values.
+ // This means r_tmp_stored != r_tmp.
r_tmp = AllocTempRef();
+ r_tmp_stored = As32BitReg(r_tmp);
+ rl_new_value_stored = As32BitReg(rl_new_value_stored);
} else {
- r_tmp = AllocTemp();
+ r_tmp_stored = r_tmp = AllocTemp();
}
+ RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
LIR* loop = NewLIR0(kPseudoTargetLabel);
- NewLIR2(kA64Ldaxr2rX | wide, r_tmp.GetReg(), r_ptr.GetReg());
+ NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
LIR* early_exit = OpCondBranch(kCondNe, NULL);
-
- NewLIR3(kA64Stlxr3wrX | wide, As32BitReg(r_tmp).GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg());
- NewLIR3(kA64Cmp3RdT, As32BitReg(r_tmp).GetReg(), 0, ENCODE_NO_SHIFT);
+ NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
+ NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
OpCondBranch(kCondNe, loop);
+ LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
+ early_exit->target = exit_loop;
+
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
- LIR* exit = NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
- early_exit->target = exit;
+ NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
FreeTemp(r_tmp); // Now unneeded.
FreeTemp(r_ptr); // Now unneeded.
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index aaee91b..eb08404 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -893,7 +893,14 @@
int expected_scale = 0;
ArmOpcode opcode = kA64Brk1d;
r_base = Check64BitReg(r_base);
- r_index = Check64BitReg(r_index);
+
+ // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
+ // register offset load (rather than doing the sign extension in a separate instruction).
+ if (r_index.Is32Bit()) {
+ // Assemble: ``sxtw xN, wN''.
+ r_index = As64BitReg(r_index);
+ NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
+ }
if (r_dest.IsFloat()) {
if (r_dest.IsDouble()) {
@@ -920,9 +927,11 @@
opcode = WIDE(kA64Ldr4rXxG);
expected_scale = 3;
break;
+ case kReference:
+ // TODO(Arm64): r_dest must be 64-bit below. Remove the hack below.
+ r_dest = (r_dest.Is64Bit()) ? As32BitReg(r_dest) : r_dest;
case kSingle:
case k32:
- case kReference:
r_dest = Check32BitReg(r_dest);
opcode = kA64Ldr4rXxG;
expected_scale = 2;
@@ -973,7 +982,14 @@
int expected_scale = 0;
ArmOpcode opcode = kA64Brk1d;
r_base = Check64BitReg(r_base);
- r_index = Check64BitReg(r_index);
+
+ // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
+ // register offset store (rather than doing the sign extension in a separate instruction).
+ if (r_index.Is32Bit()) {
+ // Assemble: ``sxtw xN, wN''.
+ r_index = As64BitReg(r_index);
+ NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
+ }
if (r_src.IsFloat()) {
if (r_src.IsDouble()) {
@@ -1000,9 +1016,11 @@
opcode = WIDE(kA64Str4rXxG);
expected_scale = 3;
break;
+ case kReference:
+ // TODO(Arm64): r_src must be 64-bit below. Remove the hack below.
+ r_src = (r_src.Is64Bit()) ? As32BitReg(r_src) : r_src;
case kSingle: // Intentional fall-trough.
case k32: // Intentional fall-trough.
- case kReference:
r_src = Check32BitReg(r_src);
opcode = kA64Str4rXxG;
expected_scale = 2;
@@ -1066,9 +1084,11 @@
alt_opcode = WIDE(kA64Ldur3rXd);
}
break;
+ case kReference:
+ // TODO(Arm64): r_dest must be 64-bit below. Remove the hack below.
+ r_dest = (r_dest.Is64Bit()) ? As32BitReg(r_dest) : r_dest;
case kSingle: // Intentional fall-through.
case k32: // Intentional fall-trough.
- case kReference:
r_dest = Check32BitReg(r_dest);
scale = 2;
if (r_dest.IsFloat()) {
@@ -1165,9 +1185,11 @@
alt_opcode = FWIDE(kA64Stur3rXd);
}
break;
+ case kReference:
+ // TODO(Arm64): r_src must be 64-bit below. Remove the hack below.
+ r_src = (r_src.Is64Bit()) ? As32BitReg(r_src) : r_src;
case kSingle: // Intentional fall-through.
case k32: // Intentional fall-trough.
- case kReference:
r_src = Check32BitReg(r_src);
scale = 2;
if (r_src.IsFloat()) {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6c670cd..e262703 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1280,7 +1280,7 @@
}
Load32Disp(rl_obj.reg, offset_offset, reg_off);
MarkPossibleNullPointerException(info->opt_flags);
- Load32Disp(rl_obj.reg, value_offset, reg_ptr);
+ LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile);
if (range_check) {
// Set up a slow path to allow retry in case of bounds violation */
OpRegReg(kOpCmp, rl_idx.reg, reg_max);
@@ -1367,8 +1367,8 @@
}
bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) {
- if (cu_->instruction_set == kMips) {
- // TODO - add Mips implementation
+ if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) {
+ // TODO - add Mips implementation; Enable Arm64.
return false;
}
RegLocation rl_src_i = info->args[0];
@@ -1499,14 +1499,8 @@
RegLocation rl_dest = InlineTargetWide(info);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
- if (cu_->instruction_set == kArm64) {
- // TODO - Can ecode ? UBXF otherwise
- // OpRegRegImm(kOpAnd, rl_result.reg, 0x7fffffffffffffff);
- return false;
- } else {
- OpRegCopyWide(rl_result.reg, rl_src.reg);
- OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
- }
+ OpRegCopyWide(rl_result.reg, rl_src.reg);
+ OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
StoreValueWide(rl_dest, rl_result);
return true;
}
@@ -1651,7 +1645,8 @@
break;
case kArm64:
- Load32Disp(TargetPtrReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg);
+ LoadRefDisp(TargetPtrReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg,
+ kNotVolatile);
break;
case kX86:
@@ -1685,10 +1680,11 @@
RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
- RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true);
+ RegLocation rl_result = EvalLoc(rl_dest, LocToRegClass(rl_dest), true);
if (is_long) {
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
- LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64);
+ if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64
+ || cu_->instruction_set == kArm64) {
+ LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k64);
} else {
RegStorage rl_temp_offset = AllocTemp();
OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
@@ -1696,7 +1692,8 @@
FreeTemp(rl_temp_offset);
}
} else {
- LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32);
+ LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0,
+ (rl_result.ref) ? kReference : k32);
}
if (is_volatile) {
@@ -1734,8 +1731,9 @@
RegLocation rl_value;
if (is_long) {
rl_value = LoadValueWide(rl_src_value, kCoreReg);
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
- StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64);
+ if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64
+ || cu_->instruction_set == kArm64) {
+ StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k64);
} else {
RegStorage rl_temp_offset = AllocTemp();
OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
@@ -1744,7 +1742,8 @@
}
} else {
rl_value = LoadValue(rl_src_value);
- StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
+ StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0,
+ (rl_value.ref) ? kReference : k32);
}
// Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
@@ -1772,12 +1771,9 @@
return;
}
DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
- // Temporary disable intrinsics for Arm64. We will enable them later step by step.
- if (cu_->instruction_set != kArm64) {
- if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
- ->GenIntrinsic(this, info)) {
- return;
- }
+ if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
+ ->GenIntrinsic(this, info)) {
+ return;
}
GenInvokeNoInline(info);
}