Special case division by small constants
Do the standard reciprocal multiply trick for small division
by small constants.
Change-Id: Iad1060ccdc6ffeb7b47d45c29ba741683ad01ab9
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 066c37c..71fd33c 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -27,6 +27,8 @@
InvokeType type, bool isRange);
#if defined(TARGET_ARM)
LIR* opIT(CompilationUnit* cUnit, ArmConditionCode cond, const char* guide);
+bool smallLiteralDivide(CompilationUnit* cUnit, Instruction::Code dalvikOpcode,
+ RegLocation rlSrc, RegLocation rlDest, int lit);
#endif
void callRuntimeHelperImm(CompilationUnit* cUnit, int helperOffset, int arg0) {
@@ -1915,9 +1917,19 @@
bool handleEasyDivide(CompilationUnit* cUnit, Instruction::Code dalvikOpcode,
RegLocation rlSrc, RegLocation rlDest, int lit)
{
+#if defined(TARGET_ARM)
+ // No divide instruction for Arm, so check for more special cases
+ if (lit < 2) {
+ return false;
+ }
+ if (!isPowerOfTwo(lit)) {
+ return smallLiteralDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit);
+ }
+#else
if (lit < 2 || !isPowerOfTwo(lit)) {
return false;
}
+#endif
int k = lowestSetBit(lit);
if (k >= 30) {
// Avoid special cases.
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index da39713..484892a 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -617,6 +617,10 @@
[0000] rm[3..0] */
kThumb2Push1, /* t3 encoding of push */
kThumb2Pop1, /* t3 encoding of pop */
+ kThumb2RsubRRR, /* rsb [111010111101] rn[19..16] [0000] rd[11..8]
+ [0000] rm[3..0] */
+ kThumb2Smull, /* smull [111110111000] rn[19-16], rdlo[15-12]
+ rdhi[11-8] [0000] rm[3-0] */
kArmLast,
};
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
index 1b7dc29..1d1442a 100644
--- a/src/compiler/codegen/arm/Assemble.cc
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -964,6 +964,16 @@
kFmtUnused, -1, -1,
IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0
| IS_LOAD, "pop1", "!0C", 4),
+ ENCODING_MAP(kThumb2RsubRRR, 0xebd00000, /* setflags encoding */
+ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+ kFmtShift, -1, -1,
+ IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+ "rsbs", "!0C, !1C, !2C!3H", 4),
+ ENCODING_MAP(kThumb2Smull, 0xfb800000,
+ kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+ kFmtBitBlt, 3, 0,
+ IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
+ "smull", "!0C, !1C, !2C, !3C", 4),
};
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index e065a2f..0fe937a 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -349,6 +349,9 @@
case kOpSub:
opcode = (thumbForm) ? kThumbSubRRR : kThumb2SubRRR;
break;
+ case kOpRsub:
+ opcode = kThumb2RsubRRR;
+ break;
case kOpAdc:
opcode = kThumb2AdcRRR;
break;
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 44cae0f..fa51266 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -788,5 +788,83 @@
}
}
+// Table of magic divisors
+enum DividePattern {
+ DivideNone,
+ Divide3,
+ Divide5,
+ Divide7,
+};
+
+struct MagicTable {
+ uint32_t magic;
+ uint32_t shift;
+ DividePattern pattern;
+};
+
+static const MagicTable magicTable[] = {
+ {0, 0, DivideNone}, // 0
+ {0, 0, DivideNone}, // 1
+ {0, 0, DivideNone}, // 2
+ {0x55555556, 0, Divide3}, // 3
+ {0, 0, DivideNone}, // 4
+ {0x66666667, 1, Divide5}, // 5
+ {0x2AAAAAAB, 0, Divide3}, // 6
+ {0x92492493, 2, Divide7}, // 7
+ {0, 0, DivideNone}, // 8
+ {0x38E38E39, 1, Divide5}, // 9
+ {0x66666667, 2, Divide5}, // 10
+ {0x2E8BA2E9, 1, Divide5}, // 11
+ {0x2AAAAAAB, 1, Divide5}, // 12
+ {0x4EC4EC4F, 2, Divide5}, // 13
+ {0x92492493, 3, Divide7}, // 14
+ {0x88888889, 3, Divide7}, // 15
+};
+
+// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
+bool smallLiteralDivide(CompilationUnit* cUnit, Instruction::Code dalvikOpcode,
+ RegLocation rlSrc, RegLocation rlDest, int lit)
+{
+ if ((lit < 0) || (lit >= (int)(sizeof(magicTable)/sizeof(magicTable[0])))) {
+ return false;
+ }
+ DividePattern pattern = magicTable[lit].pattern;
+ if (pattern == DivideNone) {
+ return false;
+ }
+ // Tuning: add rem patterns
+ if (dalvikOpcode != Instruction::DIV_INT_LIT8) {
+ return false;
+ }
+
+ int rMagic = oatAllocTemp(cUnit);
+ loadConstant(cUnit, rMagic, magicTable[lit].magic);
+ rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+ RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+ int rHi = oatAllocTemp(cUnit);
+ int rLo = oatAllocTemp(cUnit);
+ newLIR4(cUnit, kThumb2Smull, rLo, rHi, rMagic, rlSrc.lowReg);
+ switch(pattern) {
+ case Divide3:
+ opRegRegRegShift(cUnit, kOpSub, rlResult.lowReg, rHi,
+ rlSrc.lowReg, encodeShift(kArmAsr, 31));
+ break;
+ case Divide5:
+ opRegRegImm(cUnit, kOpAsr, rLo, rlSrc.lowReg, 31);
+ opRegRegRegShift(cUnit, kOpRsub, rlResult.lowReg, rLo, rHi,
+ encodeShift(kArmAsr, magicTable[lit].shift));
+ break;
+ case Divide7:
+ opRegReg(cUnit, kOpAdd, rHi, rlSrc.lowReg);
+ opRegRegImm(cUnit, kOpAsr, rLo, rlSrc.lowReg, 31);
+ opRegRegRegShift(cUnit, kOpRsub, rlResult.lowReg, rLo, rHi,
+ encodeShift(kArmAsr, magicTable[lit].shift));
+ break;
+ default:
+ LOG(FATAL) << "Unexpected pattern: " << (int)pattern;
+ }
+ storeValue(cUnit, rlDest, rlResult);
+ return true;
+}
} // namespace art