Support hardware divide instruction
Bug: 11299025
Uses sdiv for division and a combo of sdiv, mul and sub for modulus.
Only does this on processors that are capable of the sdiv instruction, as determined
by the build system.
Also provides a command line arg --instruction-set-features= to allow cross compilation.
Makefile adds the --instruction-set-features= arg to build-time dex2oat runs and defaults
it to something obtained from the target architecture.
Provides a GetInstructionSetFeatures() function on CompilerDriver that can be
queried for various features. The only feature supported right now is hasDivideInstruction().
Also adds a few more instructions to the ARM disassembler
b/11535253 is an addition to this CL to be done later.
Change-Id: Ia8aaf801fd94bc71e476902749cf20f74eba9f68
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 0d7209e..fd46975 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -97,6 +97,9 @@
CompilerBackend compiler_backend;
InstructionSet instruction_set;
+ const InstructionSetFeatures& GetInstructionSetFeatures() {
+ return compiler_driver->GetInstructionSetFeatures();
+ }
// TODO: much of this info available elsewhere. Go to the original source?
uint16_t num_dalvik_registers; // method->registers_size.
const uint16_t* insns;
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 2ff7f1c..ffaaf84 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -380,6 +380,8 @@
kThumb2CmnRR, // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0].
kThumb2EorRRR, // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
kThumb2MulRRR, // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
+ kThumb2SdivRRR, // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
+ kThumb2UdivRRR, // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
kThumb2MnvRR, // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
kThumb2RsubRRI8, // rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0].
kThumb2NegRR, // actually rsub rd, rn, #0.
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index e8c188c..3d0f263 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -687,6 +687,14 @@
kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
"mul", "!0C, !1C, !2C", 4, kFixupNone),
+ ENCODING_MAP(kThumb2SdivRRR, 0xfb90f0f0,
+ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "sdiv", "!0C, !1C, !2C", 4, kFixupNone),
+ ENCODING_MAP(kThumb2UdivRRR, 0xfbb0f0f0,
+ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "udiv", "!0C, !1C, !2C", 4, kFixupNone),
ENCODING_MAP(kThumb2MnvRR, 0xea6f0000,
kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 0a8cbf9..42bf3d4 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -466,14 +466,39 @@
RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit,
bool is_div) {
- LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
- return rl_dest;
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+ // Put the literal in a temp.
+ int lit_temp = AllocTemp();
+ LoadConstant(lit_temp, lit);
+ // Use the generic case for div/rem with arg2 in a register.
+ // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
+ rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
+ FreeTemp(lit_temp);
+
+ return rl_result;
}
RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2,
bool is_div) {
- LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
- return rl_dest;
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ if (is_div) {
+ // Simple case, use sdiv instruction.
+ OpRegRegReg(kOpDiv, rl_result.low_reg, reg1, reg2);
+ } else {
+ // Remainder case, use the following code:
+ // temp = reg1 / reg2 - integer division
+ // temp = temp * reg2
+ // dest = reg1 - temp
+
+ int temp = AllocTemp();
+ OpRegRegReg(kOpDiv, temp, reg1, reg2);
+ OpRegReg(kOpMul, temp, reg2);
+ OpRegRegReg(kOpSub, rl_result.low_reg, reg1, temp);
+ FreeTemp(temp);
+ }
+
+ return rl_result;
}
bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 3ceeacf..d631cf7 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -395,6 +395,10 @@
DCHECK_EQ(shift, 0);
opcode = kThumb2MulRRR;
break;
+ case kOpDiv:
+ DCHECK_EQ(shift, 0);
+ opcode = kThumb2SdivRRR;
+ break;
case kOpOr:
opcode = kThumb2OrrRRR;
break;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2b3404a..df6493d 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1307,6 +1307,7 @@
}
StoreValue(rl_dest, rl_result);
} else {
+ bool done = false; // Set to true if we happen to find a way to use a real instruction.
if (cu_->instruction_set == kMips) {
rl_src1 = LoadValue(rl_src1, kCoreReg);
rl_src2 = LoadValue(rl_src2, kCoreReg);
@@ -1314,7 +1315,23 @@
GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero);
}
rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv);
- } else {
+ done = true;
+ } else if (cu_->instruction_set == kThumb2) {
+ if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ // Use ARM SDIV instruction for division. For remainder we also need to
+ // calculate using a MUL and subtract.
+ rl_src1 = LoadValue(rl_src1, kCoreReg);
+ rl_src2 = LoadValue(rl_src2, kCoreReg);
+ if (check_zero) {
+ GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero);
+ }
+ rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv);
+ done = true;
+ }
+ }
+
+ // If we haven't already generated the code use the callout function.
+ if (!done) {
ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pIdivmod);
FlushAllRegs(); /* Send everything to home location */
LoadValueDirectFixed(rl_src2, TargetReg(kArg1));
@@ -1323,7 +1340,7 @@
if (check_zero) {
GenImmedCheck(kCondEq, TargetReg(kArg1), 0, kThrowDivZero);
}
- // NOTE: callout here is not a safepoint
+ // NOTE: callout here is not a safepoint.
CallHelper(r_tgt, func_offset, false /* not a safepoint */);
if (op == kOpDiv)
rl_result = GetReturn(false);
@@ -1561,11 +1578,24 @@
if (HandleEasyDivRem(opcode, is_div, rl_src, rl_dest, lit)) {
return;
}
+
+ bool done = false;
if (cu_->instruction_set == kMips) {
rl_src = LoadValue(rl_src, kCoreReg);
rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div);
- } else {
- FlushAllRegs(); /* Everything to home location */
+ done = true;
+ } else if (cu_->instruction_set == kThumb2) {
+ if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
+ // Use ARM SDIV instruction for division. For remainder we also need to
+ // calculate using a MUL and subtract.
+ rl_src = LoadValue(rl_src, kCoreReg);
+ rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div);
+ done = true;
+ }
+ }
+
+ if (!done) {
+ FlushAllRegs(); /* Everything to home location. */
LoadValueDirectFixed(rl_src, TargetReg(kArg0));
Clobber(TargetReg(kArg0));
ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pIdivmod);
@@ -1583,7 +1613,7 @@
}
rl_src = LoadValue(rl_src, kCoreReg);
rl_result = EvalLoc(rl_dest, kCoreReg, true);
- // Avoid shifts by literal 0 - no support in Thumb. Change to copy
+ // Avoid shifts by literal 0 - no support in Thumb. Change to copy.
if (shift_op && (lit == 0)) {
OpRegCopy(rl_result.low_reg, rl_src.low_reg);
} else {