Revert^4 "Partial LSE analysis & store removal"

We incorrectly handled merging unknowns in some situations.
Specifically in cases where we are unable to materialize loop-phis we
could end up with PureUnknowns which could end up hiding stores that
need to be kept.

In an unrelated issue we were incorrectly considering some values as
escapes when live at the point of an invoke. Since
SearchPhiPlaceholdersForKeptStores used a more precise notion of
escapes we could end up removing stores without being able to replace
the values.

This reverts commit 2316b3a0779f3721a78681f5c70ed6624ecaebef.
This unreverts commit b6837f0350ff66c13582b0e94178dd5ca283ff0a
This reverts commit fe270426c8a2a69a8f669339e83b86fbf40e25a1.
This unreverts commit bb6cda60e4418c0ab557ea4090e046bed8206763.

Bug: 67037140
Bug: 173120044

Reason for revert: Fixed issue causing incorrect store elimination
Test: ./test.py --host
Test: Boot cuttlefish
      atest FrameworksServicesTests:com.android.server.job.BackgroundRestrictionsTest#testPowerWhiteList

Change-Id: I2ebae9ccfaf5169d551c5019b547589d0fce1dc9
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index e2d164e..1773c07 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -15,12 +15,19 @@
  */
 #include "nodes.h"
 
+#include <algorithm>
 #include <cfloat>
 
 #include "art_method-inl.h"
+#include "base/arena_allocator.h"
+#include "base/arena_bit_vector.h"
 #include "base/bit_utils.h"
 #include "base/bit_vector-inl.h"
+#include "base/bit_vector.h"
+#include "base/iteration_range.h"
 #include "base/logging.h"
+#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
 #include "base/stl_util.h"
 #include "class_linker-inl.h"
 #include "class_root-inl.h"
@@ -263,6 +270,171 @@
   }
 }
 
+// TODO Consider moving this entirely into LoadStoreAnalysis/Elimination.
+bool HGraph::PathBetween(uint32_t source_idx, uint32_t dest_idx) const {
+  DCHECK_LT(source_idx, blocks_.size()) << "source not present in graph!";
+  DCHECK_LT(dest_idx, blocks_.size()) << "dest not present in graph!";
+  DCHECK(blocks_[source_idx] != nullptr);
+  DCHECK(blocks_[dest_idx] != nullptr);
+  return reachability_graph_.IsBitSet(source_idx, dest_idx);
+}
+
+bool HGraph::PathBetween(const HBasicBlock* source, const HBasicBlock* dest) const {
+  if (source == nullptr || dest == nullptr) {
+    return false;
+  }
+  size_t source_idx = source->GetBlockId();
+  size_t dest_idx = dest->GetBlockId();
+  return PathBetween(source_idx, dest_idx);
+}
+
+// This function/struct calculates the reachability of every node from every
+// other node by iteratively using DFS to find reachability of each individual
+// block.
+//
+// This is in practice faster then the simpler Floyd-Warshall since while that
+// is O(N**3) this is O(N*(E + N)) where N is the number of blocks and E is the
+// number of edges. Since in practice each block only has a few outgoing edges
+// we can confidently say that E ~ B*N where B is a small number (~3). We also
+// memoize the results as we go allowing us to (potentially) avoid walking the
+// entire graph for every node. To make best use of this memoization we
+// calculate the reachability of blocks in PostOrder. This means that
+// (generally) blocks that are dominated by many other blocks and dominate few
+// blocks themselves will be examined first. This makes it more likely we can
+// use our memoized results.
+class ReachabilityAnalysisHelper {
+ public:
+  ReachabilityAnalysisHelper(const HGraph* graph,
+                             ArenaBitVectorArray* reachability_graph,
+                             ArenaStack* arena_stack)
+      : graph_(graph),
+        reachability_graph_(reachability_graph),
+        arena_stack_(arena_stack),
+        temporaries_(arena_stack_),
+        block_size_(RoundUp(graph_->GetBlocks().size(), BitVector::kWordBits)),
+        all_visited_nodes_(
+            &temporaries_, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph),
+        not_post_order_visited_(
+            &temporaries_, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph) {
+    // We can't adjust the size of reachability graph any more without breaking
+    // our allocator invariants so it had better be large enough.
+    CHECK_GE(reachability_graph_->NumRows(), graph_->GetBlocks().size());
+    CHECK_GE(reachability_graph_->NumColumns(), graph_->GetBlocks().size());
+    not_post_order_visited_.SetInitialBits(graph_->GetBlocks().size());
+  }
+
+  void CalculateReachability() {
+    // Calculate what blocks connect using repeated DFS
+    //
+    // Going in PostOrder should generally give memoization a good shot of
+    // hitting.
+    for (const HBasicBlock* blk : graph_->GetPostOrder()) {
+      if (blk == nullptr) {
+        continue;
+      }
+      not_post_order_visited_.ClearBit(blk->GetBlockId());
+      CalculateConnectednessOn(blk);
+      all_visited_nodes_.SetBit(blk->GetBlockId());
+    }
+    // Get all other bits
+    for (auto idx : not_post_order_visited_.Indexes()) {
+      const HBasicBlock* blk = graph_->GetBlocks()[idx];
+      if (blk == nullptr) {
+        continue;
+      }
+      CalculateConnectednessOn(blk);
+      all_visited_nodes_.SetBit(blk->GetBlockId());
+    }
+  }
+
+ private:
+  void AddEdge(uint32_t source, const HBasicBlock* dest) {
+    reachability_graph_->SetBit(source, dest->GetBlockId());
+  }
+
+  // Union the reachability of 'idx' into 'update_block_idx'. This is done to
+  // implement memoization. In order to improve performance we do this in 4-byte
+  // blocks. Clang should be able to optimize this to larger blocks if possible.
+  void UnionBlock(size_t update_block_idx, size_t idx) {
+    reachability_graph_->UnionRows(update_block_idx, idx);
+  }
+
+  // Single DFS to get connectedness of a single block
+  void CalculateConnectednessOn(const HBasicBlock* const target_block) {
+    const uint32_t target_idx = target_block->GetBlockId();
+    ScopedArenaAllocator connectedness_temps(arena_stack_);
+    // What nodes we have already discovered and either have processed or are
+    // already on the queue.
+    ArenaBitVector discovered(
+        &connectedness_temps, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph);
+    // The work stack. What blocks we still need to process.
+    ScopedArenaVector<const HBasicBlock*> work_stack(
+        connectedness_temps.Adapter(kArenaAllocReachabilityGraph));
+    // Known max size since otherwise we'd have blocks multiple times. Avoids
+    // re-allocation
+    work_stack.reserve(graph_->GetBlocks().size());
+    discovered.SetBit(target_idx);
+    work_stack.push_back(target_block);
+    // Main DFS Loop.
+    while (!work_stack.empty()) {
+      const HBasicBlock* cur = work_stack.back();
+      work_stack.pop_back();
+      // Memoization of previous runs.
+      if (all_visited_nodes_.IsBitSet(cur->GetBlockId())) {
+        DCHECK_NE(target_block, cur);
+        // Already explored from here. Just use that data.
+        UnionBlock(target_idx, cur->GetBlockId());
+        continue;
+      }
+      for (const HBasicBlock* succ : cur->GetSuccessors()) {
+        AddEdge(target_idx, succ);
+        if (!discovered.IsBitSet(succ->GetBlockId())) {
+          work_stack.push_back(succ);
+          discovered.SetBit(succ->GetBlockId());
+        }
+      }
+    }
+  }
+
+  const HGraph* graph_;
+  // The graph's reachability_graph_ on the main allocator.
+  ArenaBitVectorArray* reachability_graph_;
+  ArenaStack* arena_stack_;
+  // An allocator for temporary bit-vectors used by this algorithm. The
+  // 'SetBit,ClearBit' on reachability_graph_ prior to the construction of this
+  // object should be the only allocation on the main allocator so it's safe to
+  // make a sub-allocator here.
+  ScopedArenaAllocator temporaries_;
+  // number of columns
+  const size_t block_size_;
+  // Where we've already completely calculated connectedness.
+  ArenaBitVector all_visited_nodes_;
+  // What we never visited and need to do later
+  ArenaBitVector not_post_order_visited_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReachabilityAnalysisHelper);
+};
+
+void HGraph::ComputeReachabilityInformation() {
+  DCHECK_EQ(reachability_graph_.GetRawData().NumSetBits(), 0u);
+  DCHECK(reachability_graph_.IsExpandable());
+  // Reserve all the bits we'll need. This is the only allocation on the
+  // standard allocator we do here, enabling us to create a new ScopedArena for
+  // use with temporaries.
+  //
+  // reachability_graph_ acts as |N| x |N| graph for PathBetween. Array is
+  // padded so each row starts on an BitVector::kWordBits-bit alignment for
+  // simplicity and performance, allowing us to union blocks together without
+  // going bit-by-bit.
+  reachability_graph_.Resize(blocks_.size(), blocks_.size(), /*clear=*/false);
+  ReachabilityAnalysisHelper helper(this, &reachability_graph_, GetArenaStack());
+  helper.CalculateReachability();
+}
+
+void HGraph::ClearReachabilityInformation() {
+  reachability_graph_.Clear();
+}
+
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());