More compile-time tuning

Another round of compile-time tuning, this time yeilding in the
vicinity of 3% total reduction in compile time (which means about
double that for the Quick Compile portion).

Primary improvements are skipping the basic block combine optimization
pass when using Quick (because we already have big blocks), combining
the null check elimination and type inference passes, and limiting
expensive local value number analysis to only those blocks which
might benefit from it.

Following this CL, the actual compile phase consumes roughly 60%
of the total dex2oat time on the host, and 55% on the target (Note,
I'm subtracting out the Deduping time here, which the timing logger
normally counts against the compiler).

A sample breakdown of the compilation time follows (this taken on
PlusOne.apk w/ a Nexus 4):

39.00% -> MIR2LIR: 1374.90 (Note: includes local optimization & scheduling)
10.25% -> MIROpt:SSATransform: 361.31
 8.45% -> BuildMIRGraph: 297.80
 7.55% -> Assemble: 266.16
 6.87% -> MIROpt:NCE_TypeInference: 242.22
 5.56% -> Dedupe: 196.15
 3.45% -> MIROpt:BBOpt: 121.53
 3.20% -> RegisterAllocation: 112.69
 3.00% -> PcMappingTable: 105.65
 2.90% -> GcMap: 102.22
 2.68% -> Launchpads: 94.50
 1.16% -> MIROpt:InitRegLoc: 40.94
 1.16% -> Cleanup: 40.93
 1.10% -> MIROpt:CodeLayout: 38.80
 0.97% -> MIROpt:ConstantProp: 34.35
 0.96% -> MIROpt:UseCount: 33.75
 0.86% -> MIROpt:CheckFilters: 30.28
 0.44% -> SpecialMIR2LIR: 15.53
 0.44% -> MIROpt:BBCombine: 15.41

(cherry pick of 9e8e234af4430abe8d144414e272cd72d215b5f3)

Change-Id: I86c665fa7e88b75eb75629a99fd292ff8c449969
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index f5913a5..6353937 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -40,7 +40,7 @@
   MIR* mir;
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
 
     DecodedInstruction *d_insn = &mir->dalvikInsn;
 
@@ -155,7 +155,7 @@
       || (bb->block_type == kExitBlock));
   BasicBlock* bb_taken = GetBasicBlock(bb->taken);
   BasicBlock* bb_fall_through = GetBasicBlock(bb->fall_through);
-  if (((bb_taken != NULL) && (bb_fall_through == NULL)) &&
+  if (((bb_fall_through == NULL) && (bb_taken != NULL)) &&
       ((bb_taken->block_type == kDalvikByteCode) || (bb_taken->block_type == kExitBlock))) {
     // Follow simple unconditional branches.
     bb = bb_taken;
@@ -216,11 +216,17 @@
     return true;
   }
   int num_temps = 0;
-  LocalValueNumbering local_valnum(cu_);
+  bool use_lvn = bb->use_lvn;
+  UniquePtr<LocalValueNumbering> local_valnum;
+  if (use_lvn) {
+    local_valnum.reset(new LocalValueNumbering(cu_));
+  }
   while (bb != NULL) {
     for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
       // TUNING: use the returned value number for CSE.
-      local_valnum.GetValueNumber(mir);
+      if (use_lvn) {
+        local_valnum->GetValueNumber(mir);
+      }
       // Look for interesting opcodes, skip otherwise
       Instruction::Code opcode = mir->dalvikInsn.opcode;
       switch (opcode) {
@@ -463,7 +469,7 @@
         }
       }
     }
-    bb = NextDominatedBlock(bb);
+    bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) : NULL;
   }
 
   if (num_temps > cu_->num_compiler_temps) {
@@ -486,7 +492,7 @@
       if (mir->ssa_rep == NULL) {
         continue;
       }
-      int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+      uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
       if (df_attributes & DF_HAS_NULL_CHKS) {
         checkstats_->null_checks++;
         if (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) {
@@ -571,7 +577,7 @@
     MIR* mir = bb->last_mir_insn;
     // Grab the attributes from the paired opcode
     MIR* throw_insn = mir->meta.throw_insn;
-    int df_attributes = oat_data_flow_attributes_[throw_insn->dalvikInsn.opcode];
+    uint64_t df_attributes = oat_data_flow_attributes_[throw_insn->dalvikInsn.opcode];
     bool can_combine = true;
     if (df_attributes & DF_HAS_NULL_CHKS) {
       can_combine &= ((throw_insn->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0);
@@ -618,74 +624,87 @@
   return false;
 }
 
-/* Eliminate unnecessary null checks for a basic block. */
-bool MIRGraph::EliminateNullChecks(struct BasicBlock* bb) {
+/*
+ * Eliminate unnecessary null checks for a basic block.   Also, while we're doing
+ * an iterative walk go ahead and perform type and size inference.
+ */
+bool MIRGraph::EliminateNullChecksAndInferTypes(struct BasicBlock* bb) {
   if (bb->data_flow_info == NULL) return false;
+  bool infer_changed = false;
+  bool do_nce = ((cu_->disable_opt & (1 << kNullCheckElimination)) == 0);
 
-  /*
-   * Set initial state.  Be conservative with catch
-   * blocks and start with no assumptions about null check
-   * status (except for "this").
-   */
-  if ((bb->block_type == kEntryBlock) | bb->catch_entry) {
-    temp_ssa_register_v_->ClearAllBits();
-    // Assume all ins are objects.
-    for (uint16_t in_reg = cu_->num_dalvik_registers - cu_->num_ins;
-         in_reg < cu_->num_dalvik_registers; in_reg++) {
-      temp_ssa_register_v_->SetBit(in_reg);
-    }
-    if ((cu_->access_flags & kAccStatic) == 0) {
-      // If non-static method, mark "this" as non-null
-      int this_reg = cu_->num_dalvik_registers - cu_->num_ins;
-      temp_ssa_register_v_->ClearBit(this_reg);
-    }
-  } else if (bb->predecessors->Size() == 1) {
-    BasicBlock* pred_bb = GetBasicBlock(bb->predecessors->Get(0));
-    temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v);
-    if (pred_bb->block_type == kDalvikByteCode) {
-      // Check to see if predecessor had an explicit null-check.
-      MIR* last_insn = pred_bb->last_mir_insn;
-      Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
-      if (last_opcode == Instruction::IF_EQZ) {
-        if (pred_bb->fall_through == bb->id) {
-          // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
-          // it can't be null.
-          temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]);
-        }
-      } else if (last_opcode == Instruction::IF_NEZ) {
-        if (pred_bb->taken == bb->id) {
-          // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
-          // null.
-          temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]);
+  if (do_nce) {
+    /*
+     * Set initial state.  Be conservative with catch
+     * blocks and start with no assumptions about null check
+     * status (except for "this").
+     */
+    if ((bb->block_type == kEntryBlock) | bb->catch_entry) {
+      temp_ssa_register_v_->ClearAllBits();
+      // Assume all ins are objects.
+      for (uint16_t in_reg = cu_->num_dalvik_registers - cu_->num_ins;
+           in_reg < cu_->num_dalvik_registers; in_reg++) {
+        temp_ssa_register_v_->SetBit(in_reg);
+      }
+      if ((cu_->access_flags & kAccStatic) == 0) {
+        // If non-static method, mark "this" as non-null
+        int this_reg = cu_->num_dalvik_registers - cu_->num_ins;
+        temp_ssa_register_v_->ClearBit(this_reg);
+      }
+    } else if (bb->predecessors->Size() == 1) {
+      BasicBlock* pred_bb = GetBasicBlock(bb->predecessors->Get(0));
+      temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v);
+      if (pred_bb->block_type == kDalvikByteCode) {
+        // Check to see if predecessor had an explicit null-check.
+        MIR* last_insn = pred_bb->last_mir_insn;
+        Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
+        if (last_opcode == Instruction::IF_EQZ) {
+          if (pred_bb->fall_through == bb->id) {
+            // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
+            // it can't be null.
+            temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]);
+          }
+        } else if (last_opcode == Instruction::IF_NEZ) {
+          if (pred_bb->taken == bb->id) {
+            // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
+            // null.
+            temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]);
+          }
         }
       }
-    }
-  } else {
-    // Starting state is union of all incoming arcs
-    GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
-    BasicBlock* pred_bb = GetBasicBlock(iter.Next());
-    DCHECK(pred_bb != NULL);
-    temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v);
-    while (true) {
-      pred_bb = GetBasicBlock(iter.Next());
-      if (!pred_bb) break;
-      if ((pred_bb->data_flow_info == NULL) ||
-          (pred_bb->data_flow_info->ending_null_check_v == NULL)) {
-        continue;
+    } else {
+      // Starting state is union of all incoming arcs
+      GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors);
+      BasicBlock* pred_bb = GetBasicBlock(iter.Next());
+      DCHECK(pred_bb != NULL);
+      temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v);
+      while (true) {
+        pred_bb = GetBasicBlock(iter.Next());
+        if (!pred_bb) break;
+        if ((pred_bb->data_flow_info == NULL) ||
+            (pred_bb->data_flow_info->ending_null_check_v == NULL)) {
+          continue;
+        }
+        temp_ssa_register_v_->Union(pred_bb->data_flow_info->ending_null_check_v);
       }
-      temp_ssa_register_v_->Union(pred_bb->data_flow_info->ending_null_check_v);
     }
+    // At this point, temp_ssa_register_v_ shows which sregs have an object definition with
+    // no intervening uses.
   }
 
-  // At this point, temp_ssa_register_v_ shows which sregs have an object definition with
-  // no intervening uses.
-
   // Walk through the instruction in the block, updating as necessary
   for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
     if (mir->ssa_rep == NULL) {
         continue;
     }
-    int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
+
+    // Propagate type info.
+    infer_changed = InferTypeAndSize(bb, mir, infer_changed);
+    if (!do_nce) {
+      continue;
+    }
+
+    uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
 
     // Might need a null check?
     if (df_attributes & DF_HAS_NULL_CHKS) {
@@ -784,25 +803,25 @@
   }
 
   // Did anything change?
-  bool changed = !temp_ssa_register_v_->Equal(bb->data_flow_info->ending_null_check_v);
-  if (changed) {
+  bool nce_changed = do_nce && !temp_ssa_register_v_->Equal(bb->data_flow_info->ending_null_check_v);
+  if (nce_changed) {
     bb->data_flow_info->ending_null_check_v->Copy(temp_ssa_register_v_);
   }
-  return changed;
+  return infer_changed | nce_changed;
 }
 
-void MIRGraph::NullCheckElimination() {
-  if (!(cu_->disable_opt & (1 << kNullCheckElimination))) {
-    DCHECK(temp_ssa_register_v_ != NULL);
+void MIRGraph::NullCheckEliminationAndTypeInference() {
+  DCHECK(temp_ssa_register_v_ != NULL);
+  if ((cu_->disable_opt & (1 << kNullCheckElimination)) == 0) {
     AllNodesIterator iter(this);
     for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
       NullCheckEliminationInit(bb);
     }
-    RepeatingPreOrderDfsIterator iter2(this);
-    bool change = false;
-    for (BasicBlock* bb = iter2.Next(change); bb != NULL; bb = iter2.Next(change)) {
-      change = EliminateNullChecks(bb);
-    }
+  }
+  RepeatingPreOrderDfsIterator iter2(this);
+  bool change = false;
+  for (BasicBlock* bb = iter2.Next(change); bb != NULL; bb = iter2.Next(change)) {
+    change = EliminateNullChecksAndInferTypes(bb);
   }
   if (cu_->enable_debug & (1 << kDebugDumpCFG)) {
     DumpCFG("/sdcard/4_post_nce_cfg/", false);
@@ -810,12 +829,14 @@
 }
 
 void MIRGraph::BasicBlockCombine() {
-  PreOrderDfsIterator iter(this);
-  for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
-    CombineBlocks(bb);
-  }
-  if (cu_->enable_debug & (1 << kDebugDumpCFG)) {
-    DumpCFG("/sdcard/5_post_bbcombine_cfg/", false);
+  if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) {
+    PreOrderDfsIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
+      CombineBlocks(bb);
+    }
+    if (cu_->enable_debug & (1 << kDebugDumpCFG)) {
+      DumpCFG("/sdcard/5_post_bbcombine_cfg/", false);
+    }
   }
 }
 
@@ -868,17 +889,20 @@
   BasicBlock* start_bb = bb;
   extended_basic_blocks_.push_back(bb->id);
   bool terminated_by_return = false;
+  bool do_local_value_numbering = false;
   // Visit blocks strictly dominated by this head.
   while (bb != NULL) {
     bb->visited = true;
     terminated_by_return |= bb->terminated_by_return;
+    do_local_value_numbering |= bb->use_lvn;
     bb = NextDominatedBlock(bb);
   }
-  if (terminated_by_return) {
-    // This extended basic block contains a return, so mark all members.
+  if (terminated_by_return || do_local_value_numbering) {
+    // Do lvn for all blocks in this extended set.
     bb = start_bb;
     while (bb != NULL) {
-      bb->dominates_return = true;
+      bb->use_lvn = do_local_value_numbering;
+      bb->dominates_return = terminated_by_return;
       bb = NextDominatedBlock(bb);
     }
   }
@@ -889,14 +913,21 @@
 void MIRGraph::BasicBlockOptimization() {
   if (!(cu_->disable_opt & (1 << kBBOpt))) {
     DCHECK_EQ(cu_->num_compiler_temps, 0);
-    ClearAllVisitedFlags();
-    PreOrderDfsIterator iter2(this);
-    for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
-      BuildExtendedBBList(bb);
+    if ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) {
+      ClearAllVisitedFlags();
+      PreOrderDfsIterator iter2(this);
+      for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
+        BuildExtendedBBList(bb);
+      }
+      // Perform extended basic block optimizations.
+      for (unsigned int i = 0; i < extended_basic_blocks_.size(); i++) {
+        BasicBlockOpt(GetBasicBlock(extended_basic_blocks_[i]));
+      }
     }
-    // Perform extended basic block optimizations.
-    for (unsigned int i = 0; i < extended_basic_blocks_.size(); i++) {
-      BasicBlockOpt(GetBasicBlock(extended_basic_blocks_[i]));
+  } else {
+    PreOrderDfsIterator iter(this);
+    for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
+      BasicBlockOpt(bb);
     }
   }
   if (cu_->enable_debug & (1 << kDebugDumpCFG)) {