Fix intersecting live ranges created by instruction scheduler
When scheduling code like the following:
LOOP:
v2=phi(v0, v1)
use(v2)
v1=...
goto LOOP
the instruction scheduler can move 'v1=...' before 'use(v2)'. This
causes live ranges of v1 and v2 to intersect and results to a MOV
instruction to be created.
The CL fixes this.
Improvements, Pixel3:
Little CPU, arm64
micro/GCCLoops
Example12 14.1%
Example10b 11.0%
Example23 8.1%
Example24 6.6%
Example10a 5.0%
FFT workload 4.7%
Compress workload 1.2%
Little CPU, arm32
micro/GCCLoops
Example23 7.5%
Example24 4.3%
MonteCarlo workload 1.35%
Big CPU, arm32 and arm64
No significant improvements
No significant regressions (> 5%) are found.
Test: test.py --host --optimizing --jit --gtest
Test: test.py --target --optimizing --jit
Test: run-gtests.sh
Change-Id: I1e4282af18f2d51fde5325a0c00a57e8bbc4fbed
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index e5ff8a8..f722cf9 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -282,6 +282,36 @@
}
}
+void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) {
+ for (HInstruction* instruction : node->GetInstruction()->GetInputs()) {
+ // Having a phi-function from a loop header as an input means the current node of the
+ // scheduling graph has a cross-iteration dependency because such phi-functions bring values
+ // from the previous iteration to the current iteration.
+ if (!instruction->IsLoopHeaderPhi()) {
+ continue;
+ }
+ for (HInstruction* phi_input : instruction->GetInputs()) {
+ // As a scheduling graph of the current basic block is built by
+ // processing instructions bottom-up, nullptr returned by GetNode means
+ // an instruction defining a value for the phi is either before the
+ // instruction represented by node or it is in a different basic block.
+ SchedulingNode* def_node = GetNode(phi_input);
+
+ // We don't create a dependency if there are uses besides the use in phi.
+ // In such cases a register to hold phi_input is usually allocated and
+ // a MOV instruction is generated. In cases with multiple uses and no MOV
+ // instruction, reordering creating a MOV instruction can improve
+ // performance more than an attempt to avoid a MOV instruction.
+ if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) {
+ // We have an implicit data dependency between node and def_node.
+ // AddAddDataDependency cannot be used because it is for explicit data dependencies.
+ // So AddOtherDependency is used.
+ AddOtherDependency(def_node, node);
+ }
+ }
+ }
+}
+
void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node,
bool is_scheduling_barrier) {
HInstruction* instruction = instruction_node->GetInstruction();
@@ -340,7 +370,11 @@
if (other_node->IsSchedulingBarrier()) {
// We have reached a scheduling barrier so we can stop further
// processing.
- DCHECK(other_node->HasOtherDependency(instruction_node));
+ //
+ // As a "other" dependency is not set up if a data dependency exists, we need to check that
+ // one of them must exist.
+ DCHECK(other_node->HasOtherDependency(instruction_node)
+ || other_node->HasDataDependency(instruction_node));
break;
}
if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) {
@@ -372,6 +406,8 @@
AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
}
}
+
+ AddCrossIterationDependencies(instruction_node);
}
static const std::string InstructionTypeId(const HInstruction* instruction) {
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index a97eda7..f7180a0 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -183,7 +183,9 @@
void AddOtherPredecessor(SchedulingNode* predecessor) {
// Check whether the predecessor has been added earlier.
- if (HasOtherDependency(predecessor)) {
+ // As an optimization of the scheduling graph, we don't need to create another dependency if
+ // there is a data dependency between scheduling nodes.
+ if (HasOtherDependency(predecessor) || HasDataDependency(predecessor)) {
return;
}
other_predecessors_.push_back(predecessor);
@@ -362,6 +364,25 @@
AddDependency(node, dependency, /*is_data_dependency*/false);
}
+ // Analyze whether the scheduling node has cross-iteration dependencies which mean it uses
+ // values defined on the previous iteration.
+ //
+ // Supported cases:
+ //
+ // L:
+ // v2 = loop_head_phi(v1)
+ // instr1(v2)
+ // v1 = instr2
+ // goto L
+ //
+ // In such cases moving instr2 before instr1 creates intersecting live ranges
+ // of v1 and v2. As a result a separate register is needed to keep the value
+ // defined by instr2 which is only used on the next iteration.
+ // If instr2 is not moved, no additional register is needed. The register
+ // used by instr1 is reused.
+ // To prevent such a situation a "other" dependency between instr1 and instr2 must be set.
+ void AddCrossIterationDependencies(SchedulingNode* node);
+
// Add dependencies nodes for the given `SchedulingNode`: inputs, environments, and side-effects.
void AddDependencies(SchedulingNode* node, bool is_scheduling_barrier = false);