AArch64: implement easy division and reminder.
This implements easy division and reminder for integer only (32-bit).
The optimisation applies to div/rem by powers of 2 and to div by small
literals (between 3-15).
Change-Id: I71be7c4de5d2e2e738b88984f13efb08f4388a19
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 2a8da24..e017e5f 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -484,7 +484,7 @@
"sdiv", "!0r, !1r, !2r", kFixupNone),
ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000),
kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
- kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123,
+ kFmtRegX, 14, 10, IS_QUAD_OP | REG_DEF0_USE123,
"smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index f1270ec..4985d57 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -64,7 +64,9 @@
// Required for target - codegen helpers.
bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
- RegLocation rl_dest, int lit);
+ RegLocation rl_dest, int lit) OVERRIDE;
+ bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
+ RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 2ac4adb..cb50674 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -271,14 +271,8 @@
// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
- RegLocation rl_src, RegLocation rl_dest, int lit) {
- // TODO(Arm64): fix this for Arm64. Note: may be worth revisiting the magic table.
- // It should be possible subtracting one from all its entries, and using smaddl
- // to counteract this. The advantage is that integers should then be easier to
- // encode as logical immediates (0x55555555 rather than 0x55555556).
- UNIMPLEMENTED(FATAL);
-
- if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
+ RegLocation rl_src, RegLocation rl_dest, int lit) {
+ if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
return false;
}
DividePattern pattern = magic_table[lit].pattern;
@@ -294,21 +288,24 @@
LoadConstant(r_magic, magic_table[lit].magic);
rl_src = LoadValue(rl_src, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
- RegStorage r_hi = AllocTemp();
- RegStorage r_lo = AllocTemp();
- NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
+ RegStorage r_long_mul = AllocTemp();
+ NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(),
+ r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
switch (pattern) {
case Divide3:
- OpRegRegRegShift(kOpSub, rl_result.reg, r_hi, rl_src.reg, EncodeShift(kA64Asr, 31));
+ OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
+ OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
break;
case Divide5:
- OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
- OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
+ OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul),
+ 32 + magic_table[lit].shift);
+ OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
break;
case Divide7:
- OpRegReg(kOpAdd, r_hi, rl_src.reg);
- OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
- OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
+ OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg),
+ As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32));
+ OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
+ OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
break;
default:
LOG(FATAL) << "Unexpected pattern: " << pattern;
@@ -317,13 +314,59 @@
return true;
}
+// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
+// and store the result in 'rl_dest'.
+bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
+ RegLocation rl_src, RegLocation rl_dest, int lit) {
+ if (lit < 2) {
+ return false;
+ }
+ if (!IsPowerOfTwo(lit)) {
+ return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
+ }
+ int k = LowestSetBit(lit);
+ if (k >= 30) {
+ // Avoid special cases.
+ return false;
+ }
+ rl_src = LoadValue(rl_src, kCoreReg);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ if (is_div) {
+ RegStorage t_reg = AllocTemp();
+ if (lit == 2) {
+ // Division by 2 is by far the most common division by constant.
+ OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
+ OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
+ } else {
+ OpRegRegImm(kOpAsr, t_reg, rl_src.reg, 31);
+ OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, EncodeShift(kA64Lsr, 32 - k));
+ OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
+ }
+ } else {
+ RegStorage t_reg = AllocTemp();
+ if (lit == 2) {
+ OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
+ OpRegRegImm(kOpAnd, t_reg, t_reg, lit - 1);
+ OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
+ } else {
+ RegStorage t_reg2 = AllocTemp();
+ OpRegRegImm(kOpAsr, t_reg, rl_src.reg, 31);
+ OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, EncodeShift(kA64Lsr, 32 - k));
+ OpRegRegImm(kOpAnd, t_reg2, t_reg2, lit - 1);
+ OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, EncodeShift(kA64Lsr, 32 - k));
+ }
+ }
+ StoreValue(rl_dest, rl_result);
+ return true;
+}
+
bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
return false;
}
RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
- RegLocation rl_src2, bool is_div, bool check_zero) {
+ RegLocation rl_src2, bool is_div, bool check_zero) {
LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
return rl_dest;
}
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 672aa88..37edbaf 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -875,7 +875,7 @@
}
int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
- return ((shift_type & 0x3) << 7) | (amount & 0x1f);
+ return ((shift_type & 0x3) << 7) | (amount & 0x3f);
}
int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index f70087d..faed6ae 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -816,8 +816,8 @@
// Shared by all targets - implemented in gen_common.cc.
void AddIntrinsicSlowPath(CallInfo* info, LIR* branch, LIR* resume = nullptr);
- bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
- RegLocation rl_src, RegLocation rl_dest, int lit);
+ virtual bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
+ RegLocation rl_src, RegLocation rl_dest, int lit);
bool HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit);
virtual void HandleSlowPaths();
void GenBarrier();