Fix topological ordering and use it for optimizations.

Use the topological sort order for ClassInitCheckElimination
and NullCheckEliminationAndTypeInference.

Change-Id: I315ca7f300dd11390f48aefebfe988baf91bdcf1
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 2b097b5..6eccb0e 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -139,7 +139,7 @@
 class NullCheckEliminationAndTypeInference : public PassME {
  public:
   NullCheckEliminationAndTypeInference()
-    : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
+    : PassME("NCE_TypeInference", kRepeatingTopologicalSortTraversal, "4_post_nce_cfg") {
   }
 
   void Start(PassDataHolder* data) const {
@@ -169,7 +169,8 @@
 
 class ClassInitCheckElimination : public PassME {
  public:
-  ClassInitCheckElimination() : PassME("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
+  ClassInitCheckElimination()
+    : PassME("ClInitCheckElimination", kRepeatingTopologicalSortTraversal) {
   }
 
   bool Gate(const PassDataHolder* data) const {
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 62973af..66c524f 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -337,16 +337,10 @@
        * @param mir_graph The MIRGraph considered.
        */
       explicit TopologicalSortIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
-            mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()) {
         // Extra setup for TopologicalSortIterator.
         idx_ = start_idx_;
         block_id_list_ = mir_graph->GetTopologicalSortOrder();
-
-        if (mir_graph->GetTopologicalSortOrder() == nullptr) {
-          /* Compute the topological order */
-          mir_graph->ComputeTopologicalSortOrder();
-        }
       }
 
       /**
@@ -375,16 +369,10 @@
       * @param mir_graph The MIRGraph considered.
       */
      explicit RepeatingTopologicalSortIterator(MIRGraph* mir_graph)
-         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
-           mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()) {
        // Extra setup for RepeatingTopologicalSortIterator.
        idx_ = start_idx_;
        block_id_list_ = mir_graph->GetTopologicalSortOrder();
-
-       if (mir_graph->GetTopologicalSortOrder() == nullptr) {
-         /* Compute the topological order */
-         mir_graph->ComputeTopologicalSortOrder();
-       }
      }
 
      /**
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 63a5570..baa46d6 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -27,6 +27,7 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "leb128.h"
 #include "pass_driver_me_post_opt.h"
+#include "utils/scoped_arena_containers.h"
 
 namespace art {
 
@@ -1437,22 +1438,24 @@
 }
 
 void MIRGraph::ComputeTopologicalSortOrder() {
-  std::queue<BasicBlock*> q;
-  std::map<int, int> visited_cnt_values;
-
   // Clear the nodes.
   ClearAllVisitedFlags();
 
   // Create the topological order if need be.
-  if (topological_order_ != nullptr) {
-    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, 0);
+  if (topological_order_ == nullptr) {
+    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks());
   }
   topological_order_->Reset();
 
+  ScopedArenaAllocator allocator(&cu_->arena_stack);
+  ScopedArenaQueue<BasicBlock*> q(allocator.Adapter());
+  ScopedArenaVector<size_t> visited_cnt_values(GetNumBlocks(), 0u, allocator.Adapter());
+
   // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero.
   // also fill initial queue.
   GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
 
+  size_t num_blocks = 0u;
   while (true) {
     BasicBlock* bb = iterator.Next();
 
@@ -1464,7 +1467,8 @@
       continue;
     }
 
-    visited_cnt_values[bb->id] = bb->predecessors->Size();
+    num_blocks += 1u;
+    size_t unvisited_predecessor_count = bb->predecessors->Size();
 
     GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors);
     // To process loops we should not wait for dominators.
@@ -1475,53 +1479,75 @@
         break;
       }
 
-      if (pred_bb->dominators == nullptr || pred_bb->hidden == true) {
-        continue;
-      }
-
-      // Skip the backward branch.
-      if (pred_bb->dominators->IsBitSet(bb->id) != 0) {
-        visited_cnt_values[bb->id]--;
+      // Skip the backward branch or hidden predecessor.
+      if (pred_bb->hidden ||
+          (pred_bb->dominators != nullptr && pred_bb->dominators->IsBitSet(bb->id))) {
+        unvisited_predecessor_count -= 1u;
       }
     }
 
+    visited_cnt_values[bb->id] = unvisited_predecessor_count;
+
     // Add entry block to queue.
-    if (visited_cnt_values[bb->id] == 0) {
+    if (unvisited_predecessor_count == 0) {
       q.push(bb);
     }
   }
 
-  while (q.size() > 0) {
-    // Get top.
-    BasicBlock* bb = q.front();
-    q.pop();
-
-    DCHECK_EQ(bb->hidden, false);
-
-    if (bb->IsExceptionBlock() == true) {
-      continue;
+  // We can theoretically get a cycle where none of the blocks dominates the other. Therefore
+  // don't stop when the queue is empty, continue until we've processed all the blocks.
+  // (In practice, we've seen this for a monitor-exit catch handler that erroneously tries to
+  // handle its own exceptions being broken into two blocks by a jump to to the monitor-exit
+  // from another catch hanler. http://b/15745363 .)
+  AllNodesIterator candidate_iter(this);  // For the empty queue case.
+  while (num_blocks != 0u) {
+    num_blocks -= 1u;
+    BasicBlock* bb = nullptr;
+    if (!q.empty()) {
+      // Get top.
+      bb = q.front();
+      q.pop();
+    } else {
+      // Find some block we didn't visit yet that has at least one visited predecessor.
+      while (bb == nullptr) {
+        BasicBlock* candidate = candidate_iter.Next();
+        DCHECK(candidate != nullptr);
+        if (candidate->visited || candidate->hidden) {
+          continue;
+        }
+        GrowableArray<BasicBlockId>::Iterator iter(candidate->predecessors);
+        for (BasicBlock* pred_bb = GetBasicBlock(iter.Next()); pred_bb != nullptr;
+            pred_bb = GetBasicBlock(iter.Next())) {
+          if (!pred_bb->hidden && pred_bb->visited) {
+            bb = candidate;
+            break;
+          }
+        }
+      }
     }
 
+    DCHECK_EQ(bb->hidden, false);
+    DCHECK_EQ(bb->visited, false);
+
     // We've visited all the predecessors. So, we can visit bb.
-    if (bb->visited == false) {
-      bb->visited = true;
+    bb->visited = true;
 
-      // Now add the basic block.
-      topological_order_->Insert(bb->id);
+    // Now add the basic block.
+    topological_order_->Insert(bb->id);
 
-      // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
-      ChildBlockIterator succIter(bb, this);
-      BasicBlock* successor = succIter.Next();
-      while (successor != nullptr) {
-        // one more predecessor was visited.
-        visited_cnt_values[successor->id]--;
+    // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
+    ChildBlockIterator succIter(bb, this);
+    BasicBlock* successor = succIter.Next();
+    for ( ; successor != nullptr; successor = succIter.Next()) {
+      if (successor->visited || successor->hidden) {
+        continue;
+      }
 
-        if (visited_cnt_values[successor->id] <= 0 && successor->visited == false && successor->hidden == false) {
-          q.push(successor);
-        }
-
-        // Take next successor.
-        successor = succIter.Next();
+      // one more predecessor was visited.
+      DCHECK_NE(visited_cnt_values[successor->id], 0u);
+      visited_cnt_values[successor->id] -= 1u;
+      if (visited_cnt_values[successor->id] == 0u) {
+        q.push(successor);
       }
     }
   }
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 15c0aa4..6c4d8e3 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -701,6 +701,7 @@
   void BasicBlockOptimization();
 
   GrowableArray<BasicBlockId>* GetTopologicalSortOrder() {
+    DCHECK(topological_order_ != nullptr);
     return topological_order_;
   }
 
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 29c353a..9b2e798 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -190,10 +190,12 @@
   void PerformClassInitCheckElimination() {
     cu_.mir_graph->SSATransformationStart();
     cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
     cu_.mir_graph->SSATransformationEnd();
     bool gate_result = cu_.mir_graph->EliminateClassInitChecksGate();
     ASSERT_TRUE(gate_result);
-    RepeatingPreOrderDfsIterator iterator(cu_.mir_graph.get());
+    RepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
     bool change = false;
     for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
       change = cu_.mir_graph->EliminateClassInitChecks(bb);
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
index 7d76fb8..031c5cf 100644
--- a/compiler/dex/pass_driver_me.h
+++ b/compiler/dex/pass_driver_me.h
@@ -62,6 +62,12 @@
       case kPostOrderDOMTraversal:
         DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass);
         break;
+      case kTopologicalSortTraversal:
+        DoWalkBasicBlocks<TopologicalSortIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kRepeatingTopologicalSortTraversal:
+        DoWalkBasicBlocks<RepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass);
+        break;
       case kAllNodes:
         DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
         break;
diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc
index cb63f41..14108af 100644
--- a/compiler/dex/pass_driver_me_post_opt.cc
+++ b/compiler/dex/pass_driver_me_post_opt.cc
@@ -36,6 +36,7 @@
   GetPassInstance<CalculatePredecessors>(),
   GetPassInstance<DFSOrders>(),
   GetPassInstance<BuildDomination>(),
+  GetPassInstance<TopologicalSortOrders>(),
   GetPassInstance<DefBlockMatrix>(),
   GetPassInstance<CreatePhiNodes>(),
   GetPassInstance<ClearVisitedFlag>(),
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
index 445c46d..a1b0df4 100644
--- a/compiler/dex/post_opt_passes.h
+++ b/compiler/dex/post_opt_passes.h
@@ -127,6 +127,23 @@
 };
 
 /**
+ * @class TopologicalSortOrders
+ * @brief Compute the topological sort order of the MIR graph
+ */
+class TopologicalSortOrders : public PassME {
+ public:
+  TopologicalSortOrders() : PassME("TopologicalSortOrders") {
+  }
+
+  void Start(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->ComputeTopologicalSortOrder();
+  }
+};
+
+/**
  * @class DefBlockMatrix
  * @brief Calculate the matrix of definition per basic block
  */
diff --git a/compiler/utils/scoped_arena_containers.h b/compiler/utils/scoped_arena_containers.h
index 5deb661..6728565 100644
--- a/compiler/utils/scoped_arena_containers.h
+++ b/compiler/utils/scoped_arena_containers.h
@@ -17,8 +17,10 @@
 #ifndef ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
 #define ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
 
-#include <vector>
+#include <deque>
+#include <queue>
 #include <set>
+#include <vector>
 
 #include "utils/scoped_arena_allocator.h"
 #include "safe_map.h"
@@ -26,6 +28,12 @@
 namespace art {
 
 template <typename T>
+using ScopedArenaDeque = std::deque<T, ScopedArenaAllocatorAdapter<T>>;
+
+template <typename T>
+using ScopedArenaQueue = std::queue<T, ScopedArenaDeque<T>>;
+
+template <typename T>
 using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>;
 
 template <typename T, typename Comparator = std::less<T>>