summaryrefslogtreecommitdiff
path: root/compiler/optimizing/nodes.cc
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing/nodes.cc')
-rw-r--r--compiler/optimizing/nodes.cc172
1 files changed, 172 insertions, 0 deletions
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index e2d164e1a2..1773c0770f 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -15,12 +15,19 @@
*/
#include "nodes.h"
+#include <algorithm>
#include <cfloat>
#include "art_method-inl.h"
+#include "base/arena_allocator.h"
+#include "base/arena_bit_vector.h"
#include "base/bit_utils.h"
#include "base/bit_vector-inl.h"
+#include "base/bit_vector.h"
+#include "base/iteration_range.h"
#include "base/logging.h"
+#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
#include "class_linker-inl.h"
#include "class_root-inl.h"
@@ -263,6 +270,171 @@ static bool UpdateDominatorOfSuccessor(HBasicBlock* block, HBasicBlock* successo
}
}
+// TODO Consider moving this entirely into LoadStoreAnalysis/Elimination.
+bool HGraph::PathBetween(uint32_t source_idx, uint32_t dest_idx) const {
+ DCHECK_LT(source_idx, blocks_.size()) << "source not present in graph!";
+ DCHECK_LT(dest_idx, blocks_.size()) << "dest not present in graph!";
+ DCHECK(blocks_[source_idx] != nullptr);
+ DCHECK(blocks_[dest_idx] != nullptr);
+ return reachability_graph_.IsBitSet(source_idx, dest_idx);
+}
+
+bool HGraph::PathBetween(const HBasicBlock* source, const HBasicBlock* dest) const {
+ if (source == nullptr || dest == nullptr) {
+ return false;
+ }
+ size_t source_idx = source->GetBlockId();
+ size_t dest_idx = dest->GetBlockId();
+ return PathBetween(source_idx, dest_idx);
+}
+
+// This function/struct calculates the reachability of every node from every
+// other node by iteratively using DFS to find reachability of each individual
+// block.
+//
+// This is in practice faster then the simpler Floyd-Warshall since while that
+// is O(N**3) this is O(N*(E + N)) where N is the number of blocks and E is the
+// number of edges. Since in practice each block only has a few outgoing edges
+// we can confidently say that E ~ B*N where B is a small number (~3). We also
+// memoize the results as we go allowing us to (potentially) avoid walking the
+// entire graph for every node. To make best use of this memoization we
+// calculate the reachability of blocks in PostOrder. This means that
+// (generally) blocks that are dominated by many other blocks and dominate few
+// blocks themselves will be examined first. This makes it more likely we can
+// use our memoized results.
+class ReachabilityAnalysisHelper {
+ public:
+ ReachabilityAnalysisHelper(const HGraph* graph,
+ ArenaBitVectorArray* reachability_graph,
+ ArenaStack* arena_stack)
+ : graph_(graph),
+ reachability_graph_(reachability_graph),
+ arena_stack_(arena_stack),
+ temporaries_(arena_stack_),
+ block_size_(RoundUp(graph_->GetBlocks().size(), BitVector::kWordBits)),
+ all_visited_nodes_(
+ &temporaries_, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph),
+ not_post_order_visited_(
+ &temporaries_, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph) {
+ // We can't adjust the size of reachability graph any more without breaking
+ // our allocator invariants so it had better be large enough.
+ CHECK_GE(reachability_graph_->NumRows(), graph_->GetBlocks().size());
+ CHECK_GE(reachability_graph_->NumColumns(), graph_->GetBlocks().size());
+ not_post_order_visited_.SetInitialBits(graph_->GetBlocks().size());
+ }
+
+ void CalculateReachability() {
+ // Calculate what blocks connect using repeated DFS
+ //
+ // Going in PostOrder should generally give memoization a good shot of
+ // hitting.
+ for (const HBasicBlock* blk : graph_->GetPostOrder()) {
+ if (blk == nullptr) {
+ continue;
+ }
+ not_post_order_visited_.ClearBit(blk->GetBlockId());
+ CalculateConnectednessOn(blk);
+ all_visited_nodes_.SetBit(blk->GetBlockId());
+ }
+ // Get all other bits
+ for (auto idx : not_post_order_visited_.Indexes()) {
+ const HBasicBlock* blk = graph_->GetBlocks()[idx];
+ if (blk == nullptr) {
+ continue;
+ }
+ CalculateConnectednessOn(blk);
+ all_visited_nodes_.SetBit(blk->GetBlockId());
+ }
+ }
+
+ private:
+ void AddEdge(uint32_t source, const HBasicBlock* dest) {
+ reachability_graph_->SetBit(source, dest->GetBlockId());
+ }
+
+ // Union the reachability of 'idx' into 'update_block_idx'. This is done to
+ // implement memoization. In order to improve performance we do this in 4-byte
+ // blocks. Clang should be able to optimize this to larger blocks if possible.
+ void UnionBlock(size_t update_block_idx, size_t idx) {
+ reachability_graph_->UnionRows(update_block_idx, idx);
+ }
+
+ // Single DFS to get connectedness of a single block
+ void CalculateConnectednessOn(const HBasicBlock* const target_block) {
+ const uint32_t target_idx = target_block->GetBlockId();
+ ScopedArenaAllocator connectedness_temps(arena_stack_);
+ // What nodes we have already discovered and either have processed or are
+ // already on the queue.
+ ArenaBitVector discovered(
+ &connectedness_temps, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph);
+ // The work stack. What blocks we still need to process.
+ ScopedArenaVector<const HBasicBlock*> work_stack(
+ connectedness_temps.Adapter(kArenaAllocReachabilityGraph));
+ // Known max size since otherwise we'd have blocks multiple times. Avoids
+ // re-allocation
+ work_stack.reserve(graph_->GetBlocks().size());
+ discovered.SetBit(target_idx);
+ work_stack.push_back(target_block);
+ // Main DFS Loop.
+ while (!work_stack.empty()) {
+ const HBasicBlock* cur = work_stack.back();
+ work_stack.pop_back();
+ // Memoization of previous runs.
+ if (all_visited_nodes_.IsBitSet(cur->GetBlockId())) {
+ DCHECK_NE(target_block, cur);
+ // Already explored from here. Just use that data.
+ UnionBlock(target_idx, cur->GetBlockId());
+ continue;
+ }
+ for (const HBasicBlock* succ : cur->GetSuccessors()) {
+ AddEdge(target_idx, succ);
+ if (!discovered.IsBitSet(succ->GetBlockId())) {
+ work_stack.push_back(succ);
+ discovered.SetBit(succ->GetBlockId());
+ }
+ }
+ }
+ }
+
+ const HGraph* graph_;
+ // The graph's reachability_graph_ on the main allocator.
+ ArenaBitVectorArray* reachability_graph_;
+ ArenaStack* arena_stack_;
+ // An allocator for temporary bit-vectors used by this algorithm. The
+ // 'SetBit,ClearBit' on reachability_graph_ prior to the construction of this
+ // object should be the only allocation on the main allocator so it's safe to
+ // make a sub-allocator here.
+ ScopedArenaAllocator temporaries_;
+ // number of columns
+ const size_t block_size_;
+ // Where we've already completely calculated connectedness.
+ ArenaBitVector all_visited_nodes_;
+ // What we never visited and need to do later
+ ArenaBitVector not_post_order_visited_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReachabilityAnalysisHelper);
+};
+
+void HGraph::ComputeReachabilityInformation() {
+ DCHECK_EQ(reachability_graph_.GetRawData().NumSetBits(), 0u);
+ DCHECK(reachability_graph_.IsExpandable());
+ // Reserve all the bits we'll need. This is the only allocation on the
+ // standard allocator we do here, enabling us to create a new ScopedArena for
+ // use with temporaries.
+ //
+ // reachability_graph_ acts as |N| x |N| graph for PathBetween. Array is
+ // padded so each row starts on an BitVector::kWordBits-bit alignment for
+ // simplicity and performance, allowing us to union blocks together without
+ // going bit-by-bit.
+ reachability_graph_.Resize(blocks_.size(), blocks_.size(), /*clear=*/false);
+ ReachabilityAnalysisHelper helper(this, &reachability_graph_, GetArenaStack());
+ helper.CalculateReachability();
+}
+
+void HGraph::ClearReachabilityInformation() {
+ reachability_graph_.Clear();
+}
+
void HGraph::ComputeDominanceInformation() {
DCHECK(reverse_post_order_.empty());
reverse_post_order_.reserve(blocks_.size());