Special case division by small constants

Do the standard reciprocal multiply trick for small division
by small constants.

Change-Id: Iad1060ccdc6ffeb7b47d45c29ba741683ad01ab9
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 066c37c..71fd33c 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -27,6 +27,8 @@
                InvokeType type, bool isRange);
 #if defined(TARGET_ARM)
 LIR* opIT(CompilationUnit* cUnit, ArmConditionCode cond, const char* guide);
+bool smallLiteralDivide(CompilationUnit* cUnit, Instruction::Code dalvikOpcode,
+                        RegLocation rlSrc, RegLocation rlDest, int lit);
 #endif
 
 void callRuntimeHelperImm(CompilationUnit* cUnit, int helperOffset, int arg0) {
@@ -1915,9 +1917,19 @@
 bool handleEasyDivide(CompilationUnit* cUnit, Instruction::Code dalvikOpcode,
                       RegLocation rlSrc, RegLocation rlDest, int lit)
 {
+#if defined(TARGET_ARM)
+    // No divide instruction for Arm, so check for more special cases
+    if (lit < 2) {
+        return false;
+    }
+    if (!isPowerOfTwo(lit)) {
+        return smallLiteralDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit);
+    }
+#else
     if (lit < 2 || !isPowerOfTwo(lit)) {
         return false;
     }
+#endif
     int k = lowestSetBit(lit);
     if (k >= 30) {
         // Avoid special cases.
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index da39713..484892a 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -617,6 +617,10 @@
                                    [0000] rm[3..0] */
     kThumb2Push1,        /* t3 encoding of push */
     kThumb2Pop1,         /* t3 encoding of pop */
+    kThumb2RsubRRR,      /* rsb [111010111101] rn[19..16] [0000] rd[11..8]
+                                [0000] rm[3..0] */
+    kThumb2Smull,        /* smull [111110111000] rn[19-16], rdlo[15-12]
+                                  rdhi[11-8] [0000] rm[3-0] */
     kArmLast,
 };
 
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
index 1b7dc29..1d1442a 100644
--- a/src/compiler/codegen/arm/Assemble.cc
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -964,6 +964,16 @@
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0
                  | IS_LOAD, "pop1", "!0C", 4),
+    ENCODING_MAP(kThumb2RsubRRR,  0xebd00000, /* setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "rsbs", "!0C, !1C, !2C!3H", 4),
+    ENCODING_MAP(kThumb2Smull,  0xfb800000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 3, 0,
+                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
+                 "smull", "!0C, !1C, !2C, !3C", 4),
 
 };
 
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index e065a2f..0fe937a 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -349,6 +349,9 @@
         case kOpSub:
             opcode = (thumbForm) ? kThumbSubRRR : kThumb2SubRRR;
             break;
+        case kOpRsub:
+            opcode = kThumb2RsubRRR;
+            break;
         case kOpAdc:
             opcode = kThumb2AdcRRR;
             break;
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 44cae0f..fa51266 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -788,5 +788,83 @@
     }
 }
 
+// Table of magic divisors
+enum DividePattern {
+    DivideNone,
+    Divide3,
+    Divide5,
+    Divide7,
+};
+
+struct MagicTable {
+    uint32_t magic;
+    uint32_t shift;
+    DividePattern pattern;
+};
+
+static const MagicTable magicTable[] = {
+    {0, 0, DivideNone},        // 0
+    {0, 0, DivideNone},        // 1
+    {0, 0, DivideNone},        // 2
+    {0x55555556, 0, Divide3},  // 3
+    {0, 0, DivideNone},        // 4
+    {0x66666667, 1, Divide5},  // 5
+    {0x2AAAAAAB, 0, Divide3},  // 6
+    {0x92492493, 2, Divide7},  // 7
+    {0, 0, DivideNone},        // 8
+    {0x38E38E39, 1, Divide5},  // 9
+    {0x66666667, 2, Divide5},  // 10
+    {0x2E8BA2E9, 1, Divide5},  // 11
+    {0x2AAAAAAB, 1, Divide5},  // 12
+    {0x4EC4EC4F, 2, Divide5},  // 13
+    {0x92492493, 3, Divide7},  // 14
+    {0x88888889, 3, Divide7},  // 15
+};
+
+// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
+bool smallLiteralDivide(CompilationUnit* cUnit, Instruction::Code dalvikOpcode,
+                        RegLocation rlSrc, RegLocation rlDest, int lit)
+{
+    if ((lit < 0) || (lit >= (int)(sizeof(magicTable)/sizeof(magicTable[0])))) {
+        return false;
+    }
+    DividePattern pattern = magicTable[lit].pattern;
+    if (pattern == DivideNone) {
+        return false;
+    }
+    // Tuning: add rem patterns
+    if (dalvikOpcode != Instruction::DIV_INT_LIT8) {
+        return false;
+    }
+
+    int rMagic = oatAllocTemp(cUnit);
+    loadConstant(cUnit, rMagic, magicTable[lit].magic);
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    int rHi = oatAllocTemp(cUnit);
+    int rLo = oatAllocTemp(cUnit);
+    newLIR4(cUnit, kThumb2Smull, rLo, rHi, rMagic, rlSrc.lowReg);
+    switch(pattern) {
+        case Divide3:
+            opRegRegRegShift(cUnit, kOpSub, rlResult.lowReg, rHi,
+                             rlSrc.lowReg, encodeShift(kArmAsr, 31));
+            break;
+        case Divide5:
+            opRegRegImm(cUnit, kOpAsr, rLo, rlSrc.lowReg, 31);
+            opRegRegRegShift(cUnit, kOpRsub, rlResult.lowReg, rLo, rHi,
+                             encodeShift(kArmAsr, magicTable[lit].shift));
+            break;
+        case Divide7:
+            opRegReg(cUnit, kOpAdd, rHi, rlSrc.lowReg);
+            opRegRegImm(cUnit, kOpAsr, rLo, rlSrc.lowReg, 31);
+            opRegRegRegShift(cUnit, kOpRsub, rlResult.lowReg, rLo, rHi,
+                             encodeShift(kArmAsr, magicTable[lit].shift));
+            break;
+        default:
+            LOG(FATAL) << "Unexpected pattern: " << (int)pattern;
+    }
+    storeValue(cUnit, rlDest, rlResult);
+    return true;
+}
 
 }  // namespace art
diff --git a/test/003-omnibus-opcodes/expected.txt b/test/003-omnibus-opcodes/expected.txt
index 25ab188..c5e67e5 100644
--- a/test/003-omnibus-opcodes/expected.txt
+++ b/test/003-omnibus-opcodes/expected.txt
@@ -15,6 +15,7 @@
 IntMath.longOperCheck
 IntMath.lit16Test
 IntMath.lit8Test
+IntMath.divLiteralTest
 IntMath.intShiftTest
 IntMath.intShiftCheck
 IntMath.longShiftTest
diff --git a/test/003-omnibus-opcodes/src/IntMath.java b/test/003-omnibus-opcodes/src/IntMath.java
index 292fdd3..2e2962a 100644
--- a/test/003-omnibus-opcodes/src/IntMath.java
+++ b/test/003-omnibus-opcodes/src/IntMath.java
@@ -272,6 +272,80 @@
         Main.assertTrue(results[7] == 55563);
     }
 
+    /*
+     * Make sure special-cased literal division matches
+     * normal division.
+     */
+    static void divLiteralTestBody(int start, int count) {
+       int normal = 0;
+       int special = 0;
+       for (int i = 0; i < count; i++) {
+           for (int j = 3; j < 16; j++) {
+               switch(j) {
+                   case 3:
+                       normal = (start+i) / j;
+                       special = (start+i) / 3;
+                       break;
+                   case 4:
+                       normal = (start+i) / j;
+                       special = (start+i) / 4;
+                       break;
+                   case 5:
+                       normal = (start+i) / j;
+                       special = (start+i) / 5;
+                       break;
+                   case 6:
+                       normal = (start+i) / j;
+                       special = (start+i) / 6;
+                       break;
+                   case 7:
+                       normal = (start+i) / j;
+                       special = (start+i) / 7;
+                       break;
+                   case 8:
+                       normal = (start+i) / j;
+                       special = (start+i) / 8;
+                       break;
+                   case 9:
+                       normal = (start+i) / j;
+                       special = (start+i) / 9;
+                       break;
+                   case 10:
+                       normal = (start+i) / j;
+                       special = (start+i) / 10;
+                       break;
+                   case 11:
+                       normal = (start+i) / j;
+                       special = (start+i) / 11;
+                       break;
+                   case 12:
+                       normal = (start+i) / j;
+                       special = (start+i) / 12;
+                       break;
+                   case 13:
+                       normal = (start+i) / j;
+                       special = (start+i) / 13;
+                       break;
+                   case 14:
+                       normal = (start+i) / j;
+                       special = (start+i) / 14;
+                       break;
+                   case 15:
+                       normal = (start+i) / j;
+                       special = (start+i) / 15;
+                       break;
+               }
+           }
+           Main.assertTrue(normal == special);
+       }
+    }
+
+    static void divLiteralTest() {
+       System.out.println("IntMath.divLiteralTest");
+       divLiteralTestBody(-1000, 2000);
+       divLiteralTestBody(0x7fffffff-2000, 2000);
+       divLiteralTestBody(0xfff0ffff, 2000);
+    }
 
     /*
      * Shift some data.  (value=0xff00aa01, dist=8)
@@ -518,6 +592,7 @@
         lit16Check(intResults);
         intResults = lit8Test(-55555);
         lit8Check(intResults);
+        divLiteralTest();
 
         intResults = intShiftTest(0xff00aa01, 8);
         intShiftCheck(intResults);