diff options
Diffstat (limited to 'compiler/optimizing/nodes.cc')
-rw-r--r-- | compiler/optimizing/nodes.cc | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index e2d164e1a2..1773c0770f 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -15,12 +15,19 @@ */ #include "nodes.h" +#include <algorithm> #include <cfloat> #include "art_method-inl.h" +#include "base/arena_allocator.h" +#include "base/arena_bit_vector.h" #include "base/bit_utils.h" #include "base/bit_vector-inl.h" +#include "base/bit_vector.h" +#include "base/iteration_range.h" #include "base/logging.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "base/stl_util.h" #include "class_linker-inl.h" #include "class_root-inl.h" @@ -263,6 +270,171 @@ static bool UpdateDominatorOfSuccessor(HBasicBlock* block, HBasicBlock* successo } } +// TODO Consider moving this entirely into LoadStoreAnalysis/Elimination. +bool HGraph::PathBetween(uint32_t source_idx, uint32_t dest_idx) const { + DCHECK_LT(source_idx, blocks_.size()) << "source not present in graph!"; + DCHECK_LT(dest_idx, blocks_.size()) << "dest not present in graph!"; + DCHECK(blocks_[source_idx] != nullptr); + DCHECK(blocks_[dest_idx] != nullptr); + return reachability_graph_.IsBitSet(source_idx, dest_idx); +} + +bool HGraph::PathBetween(const HBasicBlock* source, const HBasicBlock* dest) const { + if (source == nullptr || dest == nullptr) { + return false; + } + size_t source_idx = source->GetBlockId(); + size_t dest_idx = dest->GetBlockId(); + return PathBetween(source_idx, dest_idx); +} + +// This function/struct calculates the reachability of every node from every +// other node by iteratively using DFS to find reachability of each individual +// block. +// +// This is in practice faster then the simpler Floyd-Warshall since while that +// is O(N**3) this is O(N*(E + N)) where N is the number of blocks and E is the +// number of edges. Since in practice each block only has a few outgoing edges +// we can confidently say that E ~ B*N where B is a small number (~3). We also +// memoize the results as we go allowing us to (potentially) avoid walking the +// entire graph for every node. To make best use of this memoization we +// calculate the reachability of blocks in PostOrder. This means that +// (generally) blocks that are dominated by many other blocks and dominate few +// blocks themselves will be examined first. This makes it more likely we can +// use our memoized results. +class ReachabilityAnalysisHelper { + public: + ReachabilityAnalysisHelper(const HGraph* graph, + ArenaBitVectorArray* reachability_graph, + ArenaStack* arena_stack) + : graph_(graph), + reachability_graph_(reachability_graph), + arena_stack_(arena_stack), + temporaries_(arena_stack_), + block_size_(RoundUp(graph_->GetBlocks().size(), BitVector::kWordBits)), + all_visited_nodes_( + &temporaries_, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph), + not_post_order_visited_( + &temporaries_, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph) { + // We can't adjust the size of reachability graph any more without breaking + // our allocator invariants so it had better be large enough. + CHECK_GE(reachability_graph_->NumRows(), graph_->GetBlocks().size()); + CHECK_GE(reachability_graph_->NumColumns(), graph_->GetBlocks().size()); + not_post_order_visited_.SetInitialBits(graph_->GetBlocks().size()); + } + + void CalculateReachability() { + // Calculate what blocks connect using repeated DFS + // + // Going in PostOrder should generally give memoization a good shot of + // hitting. + for (const HBasicBlock* blk : graph_->GetPostOrder()) { + if (blk == nullptr) { + continue; + } + not_post_order_visited_.ClearBit(blk->GetBlockId()); + CalculateConnectednessOn(blk); + all_visited_nodes_.SetBit(blk->GetBlockId()); + } + // Get all other bits + for (auto idx : not_post_order_visited_.Indexes()) { + const HBasicBlock* blk = graph_->GetBlocks()[idx]; + if (blk == nullptr) { + continue; + } + CalculateConnectednessOn(blk); + all_visited_nodes_.SetBit(blk->GetBlockId()); + } + } + + private: + void AddEdge(uint32_t source, const HBasicBlock* dest) { + reachability_graph_->SetBit(source, dest->GetBlockId()); + } + + // Union the reachability of 'idx' into 'update_block_idx'. This is done to + // implement memoization. In order to improve performance we do this in 4-byte + // blocks. Clang should be able to optimize this to larger blocks if possible. + void UnionBlock(size_t update_block_idx, size_t idx) { + reachability_graph_->UnionRows(update_block_idx, idx); + } + + // Single DFS to get connectedness of a single block + void CalculateConnectednessOn(const HBasicBlock* const target_block) { + const uint32_t target_idx = target_block->GetBlockId(); + ScopedArenaAllocator connectedness_temps(arena_stack_); + // What nodes we have already discovered and either have processed or are + // already on the queue. + ArenaBitVector discovered( + &connectedness_temps, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph); + // The work stack. What blocks we still need to process. + ScopedArenaVector<const HBasicBlock*> work_stack( + connectedness_temps.Adapter(kArenaAllocReachabilityGraph)); + // Known max size since otherwise we'd have blocks multiple times. Avoids + // re-allocation + work_stack.reserve(graph_->GetBlocks().size()); + discovered.SetBit(target_idx); + work_stack.push_back(target_block); + // Main DFS Loop. + while (!work_stack.empty()) { + const HBasicBlock* cur = work_stack.back(); + work_stack.pop_back(); + // Memoization of previous runs. + if (all_visited_nodes_.IsBitSet(cur->GetBlockId())) { + DCHECK_NE(target_block, cur); + // Already explored from here. Just use that data. + UnionBlock(target_idx, cur->GetBlockId()); + continue; + } + for (const HBasicBlock* succ : cur->GetSuccessors()) { + AddEdge(target_idx, succ); + if (!discovered.IsBitSet(succ->GetBlockId())) { + work_stack.push_back(succ); + discovered.SetBit(succ->GetBlockId()); + } + } + } + } + + const HGraph* graph_; + // The graph's reachability_graph_ on the main allocator. + ArenaBitVectorArray* reachability_graph_; + ArenaStack* arena_stack_; + // An allocator for temporary bit-vectors used by this algorithm. The + // 'SetBit,ClearBit' on reachability_graph_ prior to the construction of this + // object should be the only allocation on the main allocator so it's safe to + // make a sub-allocator here. + ScopedArenaAllocator temporaries_; + // number of columns + const size_t block_size_; + // Where we've already completely calculated connectedness. + ArenaBitVector all_visited_nodes_; + // What we never visited and need to do later + ArenaBitVector not_post_order_visited_; + + DISALLOW_COPY_AND_ASSIGN(ReachabilityAnalysisHelper); +}; + +void HGraph::ComputeReachabilityInformation() { + DCHECK_EQ(reachability_graph_.GetRawData().NumSetBits(), 0u); + DCHECK(reachability_graph_.IsExpandable()); + // Reserve all the bits we'll need. This is the only allocation on the + // standard allocator we do here, enabling us to create a new ScopedArena for + // use with temporaries. + // + // reachability_graph_ acts as |N| x |N| graph for PathBetween. Array is + // padded so each row starts on an BitVector::kWordBits-bit alignment for + // simplicity and performance, allowing us to union blocks together without + // going bit-by-bit. + reachability_graph_.Resize(blocks_.size(), blocks_.size(), /*clear=*/false); + ReachabilityAnalysisHelper helper(this, &reachability_graph_, GetArenaStack()); + helper.CalculateReachability(); +} + +void HGraph::ClearReachabilityInformation() { + reachability_graph_.Clear(); +} + void HGraph::ComputeDominanceInformation() { DCHECK(reverse_post_order_.empty()); reverse_post_order_.reserve(blocks_.size()); |