diff options
| author | 2024-01-30 14:46:54 +0000 | |
|---|---|---|
| committer | 2024-01-30 16:45:22 +0000 | |
| commit | a7c461a767fd6cc3babc41179e0e945e6044ffd1 (patch) | |
| tree | baa90b0120c7b33590388f45b43befd02e2eb98c | |
| parent | f5307a31f5b67f6184cbb7e8b7fab61be3725fce (diff) | |
Speed up HConstantFoldingVisitor::PropagateValue
We can speed it up in two ways:
1) Don't call it if it has exactly one element, as we will never
be able to replace its use in the if clause
2) Lazily compute the dominated blocks when needed
Compiling locally GMS, HConstantFoldingVisitor::VisitIf goes down
from 1.8% of the compile time to 0.7%. Most of this improvement
(90%+) is coming from the `1)` optimization. This is because there
are many cases where we have only one use (the if), which is in the
same block so we compute the domination to always end up not doing
the optimization.
Bug: 278626992
Test: Locally compile gms
Test: art/test/testrunner/testrunner.py --host --64 --optimizing -b
Change-Id: Ic17b4b44840c7efa0224504031bf635584850ced
| -rw-r--r-- | compiler/optimizing/constant_folding.cc | 6 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 52 |
2 files changed, 35 insertions, 23 deletions
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 64ef17898a..b0a65ca64d 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -229,8 +229,10 @@ void HConstantFoldingVisitor::PropagateValue(HBasicBlock* starting_block, uses_before = variable->GetUses().SizeSlow(); } - variable->ReplaceUsesDominatedBy( - starting_block->GetFirstInstruction(), constant, /* strictly_dominated= */ false); + if (!variable->GetUses().HasExactlyOneElement()) { + variable->ReplaceUsesDominatedBy( + starting_block->GetFirstInstruction(), constant, /* strictly_dominated= */ false); + } if (recording_stats) { uses_after = variable->GetUses().SizeSlow(); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 833b7e9ca2..94f197d8f1 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -18,6 +18,7 @@ #include <algorithm> #include <cfloat> #include <functional> +#include <optional> #include "art_method-inl.h" #include "base/arena_allocator.h" @@ -1410,29 +1411,36 @@ void HInstruction::ReplaceWith(HInstruction* other) { void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement, bool strictly_dominated) { - // Get the dominated blocks first to faster calculation of domination afterwards. - HGraph* graph = GetBlock()->GetGraph(); - ArenaBitVector visited_blocks(graph->GetAllocator(), - graph->GetBlocks().size(), - /* expandable= */ false, - kArenaAllocMisc); - visited_blocks.ClearAllBits(); - ScopedArenaAllocator allocator(graph->GetArenaStack()); - ScopedArenaQueue<const HBasicBlock*> worklist(allocator.Adapter(kArenaAllocMisc)); HBasicBlock* dominator_block = dominator->GetBlock(); - worklist.push(dominator_block); + std::optional<ArenaBitVector> visited_blocks; - while (!worklist.empty()) { - const HBasicBlock* current = worklist.front(); - worklist.pop(); - visited_blocks.SetBit(current->GetBlockId()); - for (HBasicBlock* dominated : current->GetDominatedBlocks()) { - if (visited_blocks.IsBitSet(dominated->GetBlockId())) { - continue; + // Lazily compute the dominated blocks to faster calculation of domination afterwards. + auto maybe_generate_visited_blocks = [&visited_blocks, this, dominator_block]() { + if (visited_blocks.has_value()) { + return; + } + HGraph* graph = GetBlock()->GetGraph(); + visited_blocks.emplace(graph->GetAllocator(), + graph->GetBlocks().size(), + /* expandable= */ false, + kArenaAllocMisc); + visited_blocks->ClearAllBits(); + ScopedArenaAllocator allocator(graph->GetArenaStack()); + ScopedArenaQueue<const HBasicBlock*> worklist(allocator.Adapter(kArenaAllocMisc)); + worklist.push(dominator_block); + + while (!worklist.empty()) { + const HBasicBlock* current = worklist.front(); + worklist.pop(); + visited_blocks->SetBit(current->GetBlockId()); + for (HBasicBlock* dominated : current->GetDominatedBlocks()) { + if (visited_blocks->IsBitSet(dominated->GetBlockId())) { + continue; + } + worklist.push(dominated); } - worklist.push(dominated); } - } + }; const HUseList<HInstruction*>& uses = GetUses(); for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { @@ -1448,7 +1456,8 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, strictly_dominated ? dominator->StrictlyDominates(user) : dominator->Dominates(user); } else { // Block domination. - dominated = visited_blocks.IsBitSet(block->GetBlockId()); + maybe_generate_visited_blocks(); + dominated = visited_blocks->IsBitSet(block->GetBlockId()); } if (dominated) { @@ -1458,7 +1467,8 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, // We do not perform this for catch phis as we don't have control flow support // for their inputs. HBasicBlock* predecessor = block->GetPredecessors()[index]; - if (visited_blocks.IsBitSet(predecessor->GetBlockId())) { + maybe_generate_visited_blocks(); + if (visited_blocks->IsBitSet(predecessor->GetBlockId())) { user->ReplaceInput(replacement, index); } } |