Suspend check reworking (ready for rewiew)

I hate burning a register, but the cost of suspend checks was just too high
in our current environment.  There are things that can be done in future
releases to avoid the register burn, but for now it's worthwhile.

The general strategy is to reserve r4 as a suspend check counter.
Rather than poll the thread suspendPending counter, we instead simply
decrement the counter register.  When it rolls to zero, we check.  For
now I'm just using the counter scheme on backwards branches - we always
poll on returns (which is already heavyweight enough that the extra cost
isn't especially noticable).

I've also added an optimization hint to the MIR in case we have enough
time to test and enable the existing loop analysis code that omits the
suspend check on smallish counted loops.

Change-Id: I82d8bad5882a4cf2ccff590942e2d1520d58969d
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index ce65803..41053a2 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -1113,10 +1113,12 @@
 
         case OP_RETURN:
         case OP_RETURN_OBJECT:
+            genSuspendPoll(cUnit, mir);
             storeValue(cUnit, retLoc, rlSrc[0]);
             break;
 
         case OP_RETURN_WIDE:
+            genSuspendPoll(cUnit, mir);
             rlDest = retLocWide;
             rlDest.fp = rlSrc[0].fp;
             storeValueWide(cUnit, rlDest, rlSrc[0]);
@@ -1277,11 +1279,8 @@
         case OP_GOTO:
         case OP_GOTO_16:
         case OP_GOTO_32:
-            // TUNING: add MIR flag to disable when unnecessary
-            bool backwardBranch;
-            backwardBranch = (bb->taken->startOffset <= mir->offset);
-            if (backwardBranch) {
-                genSuspendPoll(cUnit, mir);
+            if (bb->taken->startOffset <= mir->offset) {
+                genSuspendTest(cUnit, mir);
             }
             genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
             break;
@@ -1315,7 +1314,7 @@
             ArmConditionCode cond;
             backwardBranch = (bb->taken->startOffset <= mir->offset);
             if (backwardBranch) {
-                genSuspendPoll(cUnit, mir);
+                genSuspendTest(cUnit, mir);
             }
             rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
             rlSrc[1] = loadValue(cUnit, rlSrc[1], kCoreReg);
@@ -1358,7 +1357,7 @@
             ArmConditionCode cond;
             backwardBranch = (bb->taken->startOffset <= mir->offset);
             if (backwardBranch) {
-                genSuspendPoll(cUnit, mir);
+                genSuspendTest(cUnit, mir);
             }
             rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
             opRegImm(cUnit, kOpCmp, rlSrc[0].lowReg, 0);
@@ -1999,6 +1998,27 @@
     }
 }
 
+static void handleSuspendLaunchpads(CompilationUnit *cUnit)
+{
+    ArmLIR** suspendLabel =
+        (ArmLIR **) cUnit->suspendLaunchpads.elemList;
+    int numElems = cUnit->suspendLaunchpads.numUsed;
+
+    for (int i = 0; i < numElems; i++) {
+        /* TUNING: move suspend count load into helper */
+        ArmLIR* lab = suspendLabel[i];
+        ArmLIR* resumeLab = (ArmLIR*)lab->operands[0];
+        cUnit->currentDalvikOffset = lab->operands[1];
+        oatAppendLIR(cUnit, (LIR *)lab);
+        loadWordDisp(cUnit, rSELF,
+                     OFFSETOF_MEMBER(Thread, pTestSuspendFromCode), rLR);
+        loadWordDisp(cUnit, rSELF,
+            art::Thread::SuspendCountOffset().Int32Value(), rSUSPEND);
+        opReg(cUnit, kOpBlx, rLR);
+        genUnconditionalBranch(cUnit, resumeLab);
+    }
+}
+
 static void handleThrowLaunchpads(CompilationUnit *cUnit)
 {
     ArmLIR** throwLabel =
@@ -2084,9 +2104,11 @@
 
     oatDataFlowAnalysisDispatcher(cUnit, methodBlockCodeGen,
                                   kPreOrderDFSTraversal, false /* Iterative */);
-    removeRedundantBranches(cUnit);
+    handleSuspendLaunchpads(cUnit);
 
     handleThrowLaunchpads(cUnit);
+
+    removeRedundantBranches(cUnit);
 }
 
 /* Common initialization routine for an architecture family */