Compile-time tuning

Specialized the dataflow iterators and did a few other minor tweaks.
Showing ~5% compile-time improvement in a single-threaded environment;
less in multi-threaded (presumably because we're blocked by something
else).

Change-Id: I2e2ed58d881414b9fc97e04cd0623e188259afd2
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index c41feb1..7c79f59 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -706,16 +706,15 @@
   }
 
   // Free temp registers and reset redundant store tracking.
-  ResetRegPool();
-  ResetDefTracking();
-
   ClobberAllRegs();
 
   if (bb->block_type == kEntryBlock) {
+    ResetRegPool();
     int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
     GenEntrySequence(&mir_graph_->reg_location_[start_vreg],
                          mir_graph_->reg_location_[mir_graph_->GetMethodSReg()]);
   } else if (bb->block_type == kExitBlock) {
+    ResetRegPool();
     GenExitSequence();
   }
 
@@ -815,7 +814,7 @@
       static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(),
                                       ArenaAllocator::kAllocLIR));
 
-  PreOrderDfsIterator iter(mir_graph_, false /* not iterative */);
+  PreOrderDfsIterator iter(mir_graph_);
   for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
     MethodBlockCodeGen(bb);
   }