Basic block combine pass

Combine basic blocks terminated by instruction that we have since
proven not to throw.  This change is intended to relieve some of the
computational load for llvm by reducing the number of basic blocks
it has to contend with.

Also:
  Add stats to show how successful check elimination is.
  Restore mechanism to disable some expensive optimization passes when
  compiling large methods.

Change-Id: I7fae22160988cbefb90ea9fb1cc26d7364e8d229
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index 9ac6016..4a7683b 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -140,6 +140,7 @@
   kDebugShowMemoryUsage,
   kDebugShowNops,
   kDebugCountOpcodes,
+  kDebugDumpCheckStats,
 #if defined(ART_USE_QUICK_COMPILER)
   kDebugDumpBitcodeFile,
   kDebugVerifyBitcode,
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 02c7621..1cbf2b5 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -160,6 +160,7 @@
   kExitBlock,
   kExceptionHandling,
   kCatchEntry,
+  kDead,
 };
 
 /* Utility macros to traverse the LIR list */
@@ -239,6 +240,13 @@
   LIR* misPredBranchOver;
 };
 
+struct Checkstats {
+  int nullChecks;
+  int nullChecksEliminated;
+  int rangeChecks;
+  int rangeChecksEliminated;
+};
+
 struct MIR {
   DecodedInstruction dalvikInsn;
   unsigned int width;
@@ -272,7 +280,6 @@
   bool visited;
   bool hidden;
   bool catchEntry;
-  bool fallThroughTarget;             // Reached via fallthrough
 #if defined(ART_USE_QUICK_COMPILER)
   bool hasReturn;
 #endif
@@ -412,6 +419,7 @@
       currentArena(NULL),
       numArenaBlocks(0),
       mstats(NULL),
+      checkstats(NULL),
 #if defined(ART_USE_QUICK_COMPILER)
       genBitcode(false),
       context(NULL),
@@ -571,6 +579,7 @@
   u4 insnsSize;
   bool disableDataflow; // Skip dataflow analysis if possible
   SafeMap<unsigned int, BasicBlock*> blockMap; // findBlock lookup cache
+  SafeMap<unsigned int, unsigned int> blockIdMap; // Block collapse lookup cache
   SafeMap<unsigned int, LIR*> boundaryMap; // boundary lookup cache
   int defCount;         // Used to estimate number of SSA names
 
@@ -582,6 +591,7 @@
   ArenaMemBlock* currentArena;
   int numArenaBlocks;
   Memstats* mstats;
+  Checkstats* checkstats;
 #if defined(ART_USE_QUICK_COMPILER)
   bool genBitcode;
   llvm::LLVMContext* context;
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index 8967649..a5cdbde 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -1945,6 +1945,90 @@
   return true;
 }
 
+/* Collect stats on number of checks removed */
+bool countChecks( struct CompilationUnit* cUnit, struct BasicBlock* bb)
+{
+  if (bb->dataFlowInfo == NULL) return false;
+  for (MIR* mir = bb->firstMIRInsn; mir; mir = mir->next) {
+    if (mir->ssaRep == NULL) {
+      continue;
+    }
+    int dfAttributes = oatDataFlowAttributes[mir->dalvikInsn.opcode];
+    if (dfAttributes & DF_HAS_NULL_CHKS) {
+      cUnit->checkstats->nullChecks++;
+      if (mir->optimizationFlags & MIR_IGNORE_NULL_CHECK) {
+        cUnit->checkstats->nullChecksEliminated++;
+      }
+    }
+    if (dfAttributes & DF_HAS_RANGE_CHKS) {
+      cUnit->checkstats->rangeChecks++;
+      if (mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK) {
+        cUnit->checkstats->rangeChecksEliminated++;
+      }
+    }
+  }
+  return false;
+}
+
+/* Combine any basic blocks terminated by instructions that we now know can't throw */
+bool combineBlocks(struct CompilationUnit* cUnit, struct BasicBlock* bb)
+{
+  // Loop here to allow combining a sequence of blocks
+  while (true) {
+    // Check termination conditions
+    if ((bb->firstMIRInsn == NULL)
+        || (bb->dataFlowInfo == NULL)
+        || (bb->blockType == kExceptionHandling)
+        || (bb->blockType == kExitBlock)
+        || (bb->blockType == kDead)
+        || ((bb->taken == NULL) || (bb->taken->blockType != kExceptionHandling))
+        || (bb->successorBlockList.blockListType != kNotUsed)
+        || ((int)bb->lastMIRInsn->dalvikInsn.opcode != kMirOpCheck)) {
+      break;
+    }
+
+    // Test the kMirOpCheck instruction
+    MIR* mir = bb->lastMIRInsn;
+    // Grab the attributes from the paired opcode
+    MIR* throwInsn = mir->meta.throwInsn;
+    int dfAttributes = oatDataFlowAttributes[throwInsn->dalvikInsn.opcode];
+    // Only null checks, and null checks eliminated?
+    if (((dfAttributes & DF_HAS_NULL_CHKS) == 0) || ((dfAttributes & DF_HAS_RANGE_CHKS) != 0)
+        || !(throwInsn->optimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+      break;
+    }
+    // OK - got one.  Combine
+    BasicBlock* bbNext = bb->fallThrough;
+    DCHECK(!bbNext->catchEntry);
+    DCHECK_EQ(bbNext->predecessors->numUsed, 1U);
+    MIR* tMir = bb->lastMIRInsn->prev;
+    // Overwrite the kOpCheck insn with the paired opcode
+    DCHECK_EQ(bbNext->firstMIRInsn, throwInsn);
+    *bb->lastMIRInsn = *throwInsn;
+    bb->lastMIRInsn->prev = tMir;
+    // Use the successor info from the next block
+    bb->successorBlockList = bbNext->successorBlockList;
+    // Use the ending block linkage from the next block
+    bb->fallThrough = bbNext->fallThrough;
+    bb->taken->blockType = kDead;  // Kill the unused exception block
+    bb->taken = bbNext->taken;
+    // Include the rest of the instructions
+    bb->lastMIRInsn = bbNext->lastMIRInsn;
+
+    /*
+     * NOTE: we aren't updating all dataflow info here.  Should either make sure this pass
+     * happens after uses of iDominated, domFrontier or update the dataflow info here.
+     */
+
+    // Kill bbNext and remap now-dead id to parent
+    bbNext->blockType = kDead;
+    cUnit->blockIdMap.Overwrite(bbNext->id, bb->id);
+
+    // Now, loop back and see if we can keep going
+  }
+  return false;
+}
+
 /* Eliminate unnecessary null checks for a basic block. */
 bool eliminateNullChecks( struct CompilationUnit* cUnit, struct BasicBlock* bb)
 {
@@ -2049,7 +2133,7 @@
     }
 
     // Already nullchecked?
-    if (dfAttributes & DF_HAS_NULL_CHKS) {
+    if ((dfAttributes & DF_HAS_NULL_CHKS) && !(mir->optimizationFlags & MIR_IGNORE_NULL_CHECK)) {
       int srcIdx;
       if (dfAttributes & DF_NULL_CHK_1) {
         srcIdx = 1;
@@ -2091,6 +2175,28 @@
   }
 }
 
+void oatMethodBasicBlockCombine(CompilationUnit* cUnit)
+{
+  oatDataFlowAnalysisDispatcher(cUnit, combineBlocks, kPreOrderDFSTraversal, false);
+}
+
+void oatDumpCheckStats(CompilationUnit *cUnit)
+{
+  Checkstats* stats = (Checkstats*)oatNew(cUnit, sizeof(Checkstats), true, kAllocDFInfo);
+  cUnit->checkstats = stats;
+  oatDataFlowAnalysisDispatcher(cUnit, countChecks, kAllNodes, false /* isIterative */);
+  if (stats->nullChecks > 0) {
+    LOG(INFO) << "Null Checks: " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
+              << stats->nullChecksEliminated << " of " << stats->nullChecks << " -> "
+              << ((float)stats->nullChecksEliminated/(float)stats->nullChecks) * 100.0 << "%";
+    }
+  if (stats->rangeChecks > 0) {
+    LOG(INFO) << "Range Checks: " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file) << " "
+              << stats->rangeChecksEliminated << " of " << stats->rangeChecks << " -> "
+              << ((float)stats->rangeChecksEliminated/(float)stats->rangeChecks) * 100.0 << "%";
+  }
+}
+
 void oatMethodBasicBlockOptimization(CompilationUnit *cUnit)
 {
   if (!(cUnit->disableOpt & (1 << kBBOpt))) {
diff --git a/src/compiler/Dataflow.h b/src/compiler/Dataflow.h
index 9ac54d5..8263f33 100644
--- a/src/compiler/Dataflow.h
+++ b/src/compiler/Dataflow.h
@@ -100,10 +100,12 @@
                                  DF_NULL_CHK_2 | \
                                  DF_NULL_CHK_OUT0)
 
-#define DF_HAS_NR_CHKS          (DF_HAS_NULL_CHKS | \
-                                 DF_RANGE_CHK_1 | \
+#define DF_HAS_RANGE_CHKS       (DF_RANGE_CHK_1 | \
                                  DF_RANGE_CHK_2)
 
+#define DF_HAS_NR_CHKS          (DF_HAS_NULL_CHKS | \
+                                 DF_HAS_RANGE_CHKS)
+
 #define DF_A_IS_REG             (DF_UA | DF_DA)
 #define DF_B_IS_REG             (DF_UB)
 #define DF_C_IS_REG             (DF_UC)
@@ -161,6 +163,10 @@
 
 void oatMethodNullCheckElimination(CompilationUnit*);
 
+void oatDumpCheckStats(CompilationUnit*);
+
+void oatMethodBasicBlockCombine(CompilationUnit*);
+
 void oatMethodBasicBlockOptimization(CompilationUnit*);
 
 }  // namespace art
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 8c403c5..4aa0ac8 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -80,6 +80,7 @@
   //(1 << kDebugShowMemoryUsage) |
   //(1 << kDebugShowNops) |
   //(1 << kDebugCountOpcodes) |
+  //(1 << kDebugDumpCheckStats) |
 #if defined(ART_USE_QUICK_COMPILER)
   //(1 << kDebugDumpBitcodeFile) |
   //(1 << kDebugVerifyBitcode) |
@@ -315,6 +316,7 @@
     BasicBlock *bb = (BasicBlock *) oatGrowableListGetElement(blockList,
                                                               blockIdx);
     if (bb == NULL) break;
+    if (bb->blockType == kDead) continue;
     if (bb->blockType == kEntryBlock) {
       fprintf(file, "  entry_%d [shape=Mdiamond];\n", bb->id);
     } else if (bb->blockType == kExitBlock) {
@@ -1049,18 +1051,16 @@
     }
   }
 
-#if defined(ART_USE_QUICK_COMPILER)
-  if (cUnit->genBitcode) {
-    // Bitcode generation requires full dataflow analysis, no qdMode
-    cUnit->qdMode = false;
-  }
-#endif
-
   if (cUnit->qdMode) {
+#if !defined(ART_USE_QUICK_COMPILER)
+    // Bitcode generation requires full dataflow analysis
     cUnit->disableDataflow = true;
+#endif
     // Disable optimization which require dataflow/ssa
     cUnit->disableOpt |=
+#if !defined(ART_USE_QUICK_COMPILER)
         (1 << kNullCheckElimination) |
+#endif
         (1 << kBBOpt) |
         (1 << kPromoteRegs);
     if (cUnit->printMe) {
@@ -1104,9 +1104,16 @@
   /* Perform null check elimination */
   oatMethodNullCheckElimination(cUnit.get());
 
+  /* Combine basic blocks where possible */
+  oatMethodBasicBlockCombine(cUnit.get());
+
   /* Do some basic block optimizations */
   oatMethodBasicBlockOptimization(cUnit.get());
 
+  if (cUnit->enableDebug & (1 << kDebugDumpCheckStats)) {
+    oatDumpCheckStats(cUnit.get());
+  }
+
   oatInitializeRegAlloc(cUnit.get());  // Needs to happen after SSA naming
 
   /* Allocate Registers using simple local allocation scheme */
diff --git a/src/compiler/IntermediateRep.cc b/src/compiler/IntermediateRep.cc
index 237f5e0..031d3d3 100644
--- a/src/compiler/IntermediateRep.cc
+++ b/src/compiler/IntermediateRep.cc
@@ -77,6 +77,7 @@
   oatInitGrowableList(cUnit, bb->predecessors,
                       (blockType == kExitBlock) ? 2048 : 2,
                       kListPredecessors);
+  cUnit->blockIdMap.Put(blockId, blockId);
   return bb;
 }
 
diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc
index 10957b2..f625261 100644
--- a/src/compiler/SSATransformation.cc
+++ b/src/compiler/SSATransformation.cc
@@ -57,18 +57,6 @@
 void markPreOrder(CompilationUnit* cUnit, BasicBlock* block)
 {
   block->visited = true;
-  // Can this block be reached only via previous block fallthrough?
-  if ((block->blockType == kDalvikByteCode) &&
-      (block->predecessors->numUsed == 1)) {
-    DCHECK_GE(cUnit->dfsOrder.numUsed, 1U);
-    int prevIdx = cUnit->dfsOrder.numUsed - 1;
-    int prevId = cUnit->dfsOrder.elemList[prevIdx];
-    BasicBlock* predBB = (BasicBlock*)block->predecessors->elemList[0];
-    if (predBB->id == prevId) {
-      block->fallThroughTarget = true;
-    }
-  }
-
   /* Enqueue the preOrder block id */
   oatInsertGrowableList(cUnit, &cUnit->dfsOrder, block->id);
 }
@@ -107,9 +95,6 @@
     int prevIdx = cUnit->dfsOrder.numUsed - 1;
     int prevId = cUnit->dfsOrder.elemList[prevIdx];
     BasicBlock* predBB = (BasicBlock*)block->predecessors->elemList[0];
-    if (predBB->id == prevId) {
-      block->fallThroughTarget = true;
-    }
   }
 
   /* Enqueue the preOrder block id */
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index 3aea76a..bdc2c8b 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -998,5 +998,20 @@
   }
 }
 
+/*
+ * Set up special LIR to mark a Dalvik byte-code instruction start and
+ * record it in the boundaryMap.  NOTE: in cases such as kMirOpCheck in
+ * which we split a single Dalvik instruction, only the first MIR op
+ * associated with a Dalvik PC should be entered into the map.
+ */
+LIR* markBoundary(CompilationUnit* cUnit, int offset, const char* instStr)
+{
+  LIR* res = newLIR1(cUnit, kPseudoDalvikByteCodeBoundary, (intptr_t) instStr);
+  if (cUnit->boundaryMap.find(offset) == cUnit->boundaryMap.end()) {
+    cUnit->boundaryMap.Put(offset, res);
+  }
+  return res;
+}
 
-}  // namespace art
+}
+ // namespace art
diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc
index cfe6e6a..8d62750 100644
--- a/src/compiler/codegen/MethodBitcode.cc
+++ b/src/compiler/codegen/MethodBitcode.cc
@@ -1668,8 +1668,11 @@
         DCHECK_EQ(rlDest.fp, loc.fp);
         DCHECK_EQ(rlDest.core, loc.core);
         DCHECK_EQ(rlDest.ref, loc.ref);
+        SafeMap<unsigned int, unsigned int>::iterator it;
+        it = cUnit->blockIdMap.find(incoming[i]);
+        DCHECK(it != cUnit->blockIdMap.end());
         phi->addIncoming(getLLVMValue(cUnit, loc.origSReg),
-                         getLLVMBlock(cUnit, incoming[i]));
+                         getLLVMBlock(cUnit, it->second));
       }
       defineValue(cUnit, phi, rlDest.origSReg);
       break;
@@ -1752,6 +1755,7 @@
 /* Handle the content in each basic block */
 bool methodBlockBitcodeConversion(CompilationUnit* cUnit, BasicBlock* bb)
 {
+  if (bb->blockType == kDead) return false;
   llvm::BasicBlock* llvmBB = getLLVMBlock(cUnit, bb->id);
   cUnit->irb->SetInsertPoint(llvmBB);
   setDexOffset(cUnit, bb->startOffset);
@@ -1964,7 +1968,7 @@
 bool createLLVMBasicBlock(CompilationUnit* cUnit, BasicBlock* bb)
 {
   // Skip the exit block
-  if (bb->blockType == kExitBlock) {
+  if ((bb->blockType == kDead) ||(bb->blockType == kExitBlock)) {
     cUnit->idToBlockMap.Put(bb->id, NULL);
   } else {
     int offset = bb->startOffset;
@@ -2924,11 +2928,8 @@
     cUnit->liveSReg = INVALID_SREG;
 #endif
 
-    LIR* boundaryLIR;
-    const char* instStr = "boundary";
-    boundaryLIR = newLIR1(cUnit, kPseudoDalvikByteCodeBoundary,
-                          (intptr_t) instStr);
-    cUnit->boundaryMap.Overwrite(cUnit->currentDalvikOffset, boundaryLIR);
+    // TODO: use llvm opcode name here instead of "boundary" if verbose
+    LIR* boundaryLIR = markBoundary(cUnit, cUnit->currentDalvikOffset, "boundary");
 
     /* Remember the first LIR for thisl block*/
     if (headLIR == NULL) {
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index b4e14ce..e76834d 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -881,14 +881,7 @@
   oatResetRegPool(cUnit);
   oatResetDefTracking(cUnit);
 
-  /*
-   * If control reached us from our immediate predecessor via
-   * fallthrough and we have no other incoming arcs we can
-   * reuse existing liveness.  Otherwise, reset.
-   */
-  if (!bb->fallThroughTarget || bb->predecessors->numUsed != 1) {
-    oatClobberAllRegs(cUnit);
-  }
+  oatClobberAllRegs(cUnit);
 
   LIR* headLIR = NULL;
 
@@ -922,9 +915,7 @@
     /* Mark the beginning of a Dalvik instruction for line tracking */
     char* instStr = cUnit->printMe ?
        oatGetDalvikDisassembly(cUnit, mir->dalvikInsn, "") : NULL;
-    boundaryLIR = newLIR1(cUnit, kPseudoDalvikByteCodeBoundary,
-                          (intptr_t) instStr);
-    cUnit->boundaryMap.Overwrite(mir->offset, boundaryLIR);
+    boundaryLIR = markBoundary(cUnit, mir->offset, instStr);
     /* Remember the first LIR for this block */
     if (headLIR == NULL) {
       headLIR = boundaryLIR;
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index d2532b7..925bbf2 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -127,13 +127,10 @@
 /* Used for the "printMe" listing */
 void genPrintLabel(CompilationUnit *cUnit, MIR* mir)
 {
-  LIR* boundaryLIR;
   /* Mark the beginning of a Dalvik instruction for line tracking */
   char* instStr = cUnit->printMe ?
      oatGetDalvikDisassembly(cUnit, mir->dalvikInsn, "") : NULL;
-  boundaryLIR = newLIR1(cUnit, kPseudoDalvikByteCodeBoundary,
-     (intptr_t) instStr);
-  cUnit->boundaryMap.Put(mir->offset, boundaryLIR);
+  markBoundary(cUnit, mir->offset, instStr);
   /* Don't generate the SSA annotation unless verbose mode is on */
   if (cUnit->printMe && mir->ssaRep) {
     char* ssaString = oatGetSSAString(cUnit, mir->ssaRep);
diff --git a/src/safe_map.h b/src/safe_map.h
index 62a0415..4001aaf 100644
--- a/src/safe_map.h
+++ b/src/safe_map.h
@@ -75,7 +75,11 @@
   // of this container is a pointer, any overwritten pointer will be lost and if this container
   // was the owner, you have a leak.
   void Overwrite(const K& k, const V& v) {
-    map_.insert(std::make_pair(k, v));
+    std::pair<iterator, bool> result = map_.insert(std::make_pair(k, v));
+    if (!result.second) {
+      // Already there - update the value for the existing key
+      result.first->second = v;
+    }
   }
 
   bool Equals(const Self& rhs) const {