diff options
author | 2025-01-30 15:48:55 +0000 | |
---|---|---|
committer | 2025-02-11 04:53:55 -0800 | |
commit | 321bf229a512c1d93b2ad9212b7c4863d551e36c (patch) | |
tree | 6334fdca835d0177a09e1cefe76586a22b4cd01c /compiler | |
parent | 7c2bfb75a878a145c854ae0c2cb70864afd20df6 (diff) |
Optimize FindVisitedBlockWithRecyclableSet
By adding extra bookkeeping, we can speed up the ValueSet reusable
algorithm of GVN. A block's ValueSet is reused if it will not be
used again in the future. This happens when said blocks last
dominated block or successor is visited, since we visit blocks
in RPO. This lets us create a list of free sets to be reused and
skip iterating through all visited blocks. This optimization is
better the bigger the graph.
Based on local compiles, this speeds up GVN by 12-27% and overall
compilation times by 0.6-1.6%.
Bug: 393108375
Test: art/test/testrunner/testrunner.py --host --64 -b --optimizing
Change-Id: I3731e67796a2055907b795656146aaea4f448614
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/gvn.cc | 84 |
1 files changed, 51 insertions, 33 deletions
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index d088e02132..a82d6c4325 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -362,9 +362,17 @@ class GlobalValueNumberer : public ValueObject { allocator_(graph->GetArenaStack()), side_effects_(side_effects), sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)), + dominated_to_visit_(graph->GetBlocks().size(), allocator_.Adapter(kArenaAllocGvn)), + successors_to_visit_(graph->GetBlocks().size(), allocator_.Adapter(kArenaAllocGvn)), + free_sets_(&allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn), visited_blocks_( &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn), - did_optimization_(false) {} + did_optimization_(false) { + for (HBasicBlock* block : graph->GetReversePostOrder()) { + dominated_to_visit_[block->GetBlockId()] = block->GetDominatedBlocks().size(); + successors_to_visit_[block->GetBlockId()] = block->GetSuccessors().size(); + } + } bool Run(); @@ -387,23 +395,31 @@ class GlobalValueNumberer : public ValueObject { DCHECK(sets_[block->GetBlockId()] != nullptr) << "Block B" << block->GetBlockId() << " expected to have a set"; sets_[block->GetBlockId()] = nullptr; + free_sets_.ClearBit(block->GetBlockId()); } // Returns false if the GlobalValueNumberer has already visited all blocks // which may reference `block`. - bool WillBeReferencedAgain(HBasicBlock* block) const; + bool WillBeReferencedAgain(uint32_t block_id) const; // Iterates over visited blocks and finds one which has a ValueSet such that: // (a) it will not be referenced in the future, and // (b) it can hold a copy of `reference_set` with a reasonable load factor. - HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block, - const ValueSet& reference_set) const; + HBasicBlock* FindVisitedBlockWithRecyclableSet(const ValueSet& reference_set) const; // ValueSet for blocks. Initially null, but for an individual block they // are allocated and populated by the dominator, and updated by all blocks // in the path from the dominator to the block. ScopedArenaVector<ValueSet*> sets_; + // Extra bookkeeping to speed up GVN, indexed by block id. + // Number of dominated blocks left to visit. + ScopedArenaVector<uint32_t> dominated_to_visit_; + // Number of successor blocks left to visit. + ScopedArenaVector<uint32_t> successors_to_visit_; + // True iff the block's ValueSet is free to be reused by another block. + ArenaBitVector free_sets_; + // BitVector which serves as a fast-access map from block id to // visited/unvisited Boolean. ArenaBitVector visited_blocks_; @@ -449,7 +465,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { // Try to find a basic block which will never be referenced again and whose // ValueSet can therefore be recycled. We will need to copy `dominator_set` // into the recycled set, so we pass `dominator_set` as a reference for size. - HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set); + HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(*dominator_set); if (recyclable == nullptr) { // No block with a suitable ValueSet found. Allocate a new one and // copy `dominator_set` into it. @@ -528,54 +544,56 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { } visited_blocks_.SetBit(block->GetBlockId()); -} - -bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const { - DCHECK(visited_blocks_.IsBitSet(block->GetBlockId())); - for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) { - if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) { - return true; + // Bookkeeping to mark ValueSets to be reused. We mark them as free if the dominator / + // predecessor will never be referenced again, and if it hasn't been reused already by this + // method (See the `dominator->GetSuccessors().size() == 1` case). + if (block->GetDominator() != nullptr) { + const uint32_t id = block->GetDominator()->GetBlockId(); + dominated_to_visit_[id]--; + if (!WillBeReferencedAgain(id) && sets_[id] != nullptr) { + free_sets_.SetBit(id); } } - - for (const HBasicBlock* successor : block->GetSuccessors()) { - if (!visited_blocks_.IsBitSet(successor->GetBlockId())) { - return true; + for (HBasicBlock* pred : predecessors) { + const uint32_t id = pred->GetBlockId(); + successors_to_visit_[id]--; + if (!WillBeReferencedAgain(id) && sets_[id] != nullptr) { + free_sets_.SetBit(id); } } +} - return false; +bool GlobalValueNumberer::WillBeReferencedAgain(uint32_t block_id) const { + // Block itself hasn't been visited, or + // a dominated block has yet to be visited, or + // a successor is yet to be visited. + return !visited_blocks_.IsBitSet(block_id) || + dominated_to_visit_[block_id] != 0 || + successors_to_visit_[block_id] != 0; } HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( - HBasicBlock* block, const ValueSet& reference_set) const { + const ValueSet& reference_set) const { HBasicBlock* secondary_match = nullptr; - for (size_t block_id : visited_blocks_.Indexes()) { + // TODO(solanes): If we keep `free_sets_` sorted by size we could do a binary search instead of a + // linear one. Note that while a HashMap<size, free_sets> is better for knowing if there's an + // exact match, that data structure is worse for the exact_match=false case. + for (size_t block_id : free_sets_.Indexes()) { + DCHECK(!WillBeReferencedAgain(block_id)); ValueSet* current_set = sets_[block_id]; - if (current_set == nullptr) { - // Set was already recycled. - continue; - } - - HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id]; + DCHECK_NE(current_set, nullptr); // We test if `current_set` has enough buckets to store a copy of // `reference_set` with a reasonable load factor. If we find a set whose // number of buckets matches perfectly, we return right away. If we find one // that is larger, we return it if no perfectly-matching set is found. - // Note that we defer testing WillBeReferencedAgain until all other criteria - // have been satisfied because it might be expensive. if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) { - if (!WillBeReferencedAgain(current_block)) { - return current_block; - } + return graph_->GetBlocks()[block_id]; } else if (secondary_match == nullptr && current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) { - if (!WillBeReferencedAgain(current_block)) { - secondary_match = current_block; - } + secondary_match = graph_->GetBlocks()[block_id]; } } |