summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Santiago Aboy Solanes <solanes@google.com> 2024-01-30 14:46:54 +0000
committer Santiago Aboy Solanes <solanes@google.com> 2024-01-30 16:45:22 +0000
commita7c461a767fd6cc3babc41179e0e945e6044ffd1 (patch)
treebaa90b0120c7b33590388f45b43befd02e2eb98c
parentf5307a31f5b67f6184cbb7e8b7fab61be3725fce (diff)
Speed up HConstantFoldingVisitor::PropagateValue
We can speed it up in two ways: 1) Don't call it if it has exactly one element, as we will never be able to replace its use in the if clause 2) Lazily compute the dominated blocks when needed Compiling locally GMS, HConstantFoldingVisitor::VisitIf goes down from 1.8% of the compile time to 0.7%. Most of this improvement (90%+) is coming from the `1)` optimization. This is because there are many cases where we have only one use (the if), which is in the same block so we compute the domination to always end up not doing the optimization. Bug: 278626992 Test: Locally compile gms Test: art/test/testrunner/testrunner.py --host --64 --optimizing -b Change-Id: Ic17b4b44840c7efa0224504031bf635584850ced
-rw-r--r--compiler/optimizing/constant_folding.cc6
-rw-r--r--compiler/optimizing/nodes.cc52
2 files changed, 35 insertions, 23 deletions
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 64ef17898a..b0a65ca64d 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -229,8 +229,10 @@ void HConstantFoldingVisitor::PropagateValue(HBasicBlock* starting_block,
uses_before = variable->GetUses().SizeSlow();
}
- variable->ReplaceUsesDominatedBy(
- starting_block->GetFirstInstruction(), constant, /* strictly_dominated= */ false);
+ if (!variable->GetUses().HasExactlyOneElement()) {
+ variable->ReplaceUsesDominatedBy(
+ starting_block->GetFirstInstruction(), constant, /* strictly_dominated= */ false);
+ }
if (recording_stats) {
uses_after = variable->GetUses().SizeSlow();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 833b7e9ca2..94f197d8f1 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -18,6 +18,7 @@
#include <algorithm>
#include <cfloat>
#include <functional>
+#include <optional>
#include "art_method-inl.h"
#include "base/arena_allocator.h"
@@ -1410,29 +1411,36 @@ void HInstruction::ReplaceWith(HInstruction* other) {
void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator,
HInstruction* replacement,
bool strictly_dominated) {
- // Get the dominated blocks first to faster calculation of domination afterwards.
- HGraph* graph = GetBlock()->GetGraph();
- ArenaBitVector visited_blocks(graph->GetAllocator(),
- graph->GetBlocks().size(),
- /* expandable= */ false,
- kArenaAllocMisc);
- visited_blocks.ClearAllBits();
- ScopedArenaAllocator allocator(graph->GetArenaStack());
- ScopedArenaQueue<const HBasicBlock*> worklist(allocator.Adapter(kArenaAllocMisc));
HBasicBlock* dominator_block = dominator->GetBlock();
- worklist.push(dominator_block);
+ std::optional<ArenaBitVector> visited_blocks;
- while (!worklist.empty()) {
- const HBasicBlock* current = worklist.front();
- worklist.pop();
- visited_blocks.SetBit(current->GetBlockId());
- for (HBasicBlock* dominated : current->GetDominatedBlocks()) {
- if (visited_blocks.IsBitSet(dominated->GetBlockId())) {
- continue;
+ // Lazily compute the dominated blocks to faster calculation of domination afterwards.
+ auto maybe_generate_visited_blocks = [&visited_blocks, this, dominator_block]() {
+ if (visited_blocks.has_value()) {
+ return;
+ }
+ HGraph* graph = GetBlock()->GetGraph();
+ visited_blocks.emplace(graph->GetAllocator(),
+ graph->GetBlocks().size(),
+ /* expandable= */ false,
+ kArenaAllocMisc);
+ visited_blocks->ClearAllBits();
+ ScopedArenaAllocator allocator(graph->GetArenaStack());
+ ScopedArenaQueue<const HBasicBlock*> worklist(allocator.Adapter(kArenaAllocMisc));
+ worklist.push(dominator_block);
+
+ while (!worklist.empty()) {
+ const HBasicBlock* current = worklist.front();
+ worklist.pop();
+ visited_blocks->SetBit(current->GetBlockId());
+ for (HBasicBlock* dominated : current->GetDominatedBlocks()) {
+ if (visited_blocks->IsBitSet(dominated->GetBlockId())) {
+ continue;
+ }
+ worklist.push(dominated);
}
- worklist.push(dominated);
}
- }
+ };
const HUseList<HInstruction*>& uses = GetUses();
for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
@@ -1448,7 +1456,8 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator,
strictly_dominated ? dominator->StrictlyDominates(user) : dominator->Dominates(user);
} else {
// Block domination.
- dominated = visited_blocks.IsBitSet(block->GetBlockId());
+ maybe_generate_visited_blocks();
+ dominated = visited_blocks->IsBitSet(block->GetBlockId());
}
if (dominated) {
@@ -1458,7 +1467,8 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator,
// We do not perform this for catch phis as we don't have control flow support
// for their inputs.
HBasicBlock* predecessor = block->GetPredecessors()[index];
- if (visited_blocks.IsBitSet(predecessor->GetBlockId())) {
+ maybe_generate_visited_blocks();
+ if (visited_blocks->IsBitSet(predecessor->GetBlockId())) {
user->ReplaceInput(replacement, index);
}
}