diff options
author | 2019-04-03 10:46:13 +0100 | |
---|---|---|
committer | 2019-10-29 15:40:27 +0000 | |
commit | 45217376b527cd17d758152c54960e6786288e31 (patch) | |
tree | cc2ae731f7ebfe61af74fa8150025064a0245a8a | |
parent | 8b236fac816beb18e4919e2c4260da843257a4e3 (diff) |
Fix intersecting live ranges created by instruction scheduler
When scheduling code like the following:
LOOP:
v2=phi(v0, v1)
use(v2)
v1=...
goto LOOP
the instruction scheduler can move 'v1=...' before 'use(v2)'. This
causes live ranges of v1 and v2 to intersect and results to a MOV
instruction to be created.
The CL fixes this.
Improvements, Pixel3:
Little CPU, arm64
micro/GCCLoops
Example12 14.1%
Example10b 11.0%
Example23 8.1%
Example24 6.6%
Example10a 5.0%
FFT workload 4.7%
Compress workload 1.2%
Little CPU, arm32
micro/GCCLoops
Example23 7.5%
Example24 4.3%
MonteCarlo workload 1.35%
Big CPU, arm32 and arm64
No significant improvements
No significant regressions (> 5%) are found.
Test: test.py --host --optimizing --jit --gtest
Test: test.py --target --optimizing --jit
Test: run-gtests.sh
Change-Id: I1e4282af18f2d51fde5325a0c00a57e8bbc4fbed
-rw-r--r-- | compiler/optimizing/scheduler.cc | 38 | ||||
-rw-r--r-- | compiler/optimizing/scheduler.h | 23 | ||||
-rw-r--r-- | test/706-checker-scheduler/src/Main.java | 119 |
3 files changed, 177 insertions, 3 deletions
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index e5ff8a886a..f722cf91a7 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -282,6 +282,36 @@ static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candid } } +void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) { + for (HInstruction* instruction : node->GetInstruction()->GetInputs()) { + // Having a phi-function from a loop header as an input means the current node of the + // scheduling graph has a cross-iteration dependency because such phi-functions bring values + // from the previous iteration to the current iteration. + if (!instruction->IsLoopHeaderPhi()) { + continue; + } + for (HInstruction* phi_input : instruction->GetInputs()) { + // As a scheduling graph of the current basic block is built by + // processing instructions bottom-up, nullptr returned by GetNode means + // an instruction defining a value for the phi is either before the + // instruction represented by node or it is in a different basic block. + SchedulingNode* def_node = GetNode(phi_input); + + // We don't create a dependency if there are uses besides the use in phi. + // In such cases a register to hold phi_input is usually allocated and + // a MOV instruction is generated. In cases with multiple uses and no MOV + // instruction, reordering creating a MOV instruction can improve + // performance more than an attempt to avoid a MOV instruction. + if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) { + // We have an implicit data dependency between node and def_node. + // AddAddDataDependency cannot be used because it is for explicit data dependencies. + // So AddOtherDependency is used. + AddOtherDependency(def_node, node); + } + } + } +} + void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node, bool is_scheduling_barrier) { HInstruction* instruction = instruction_node->GetInstruction(); @@ -340,7 +370,11 @@ void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node, if (other_node->IsSchedulingBarrier()) { // We have reached a scheduling barrier so we can stop further // processing. - DCHECK(other_node->HasOtherDependency(instruction_node)); + // + // As a "other" dependency is not set up if a data dependency exists, we need to check that + // one of them must exist. + DCHECK(other_node->HasOtherDependency(instruction_node) + || other_node->HasDataDependency(instruction_node)); break; } if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) { @@ -372,6 +406,8 @@ void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node, AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node); } } + + AddCrossIterationDependencies(instruction_node); } static const std::string InstructionTypeId(const HInstruction* instruction) { diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index a97eda7e85..f7180a02d7 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -183,7 +183,9 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { void AddOtherPredecessor(SchedulingNode* predecessor) { // Check whether the predecessor has been added earlier. - if (HasOtherDependency(predecessor)) { + // As an optimization of the scheduling graph, we don't need to create another dependency if + // there is a data dependency between scheduling nodes. + if (HasOtherDependency(predecessor) || HasDataDependency(predecessor)) { return; } other_predecessors_.push_back(predecessor); @@ -362,6 +364,25 @@ class SchedulingGraph : public ValueObject { AddDependency(node, dependency, /*is_data_dependency*/false); } + // Analyze whether the scheduling node has cross-iteration dependencies which mean it uses + // values defined on the previous iteration. + // + // Supported cases: + // + // L: + // v2 = loop_head_phi(v1) + // instr1(v2) + // v1 = instr2 + // goto L + // + // In such cases moving instr2 before instr1 creates intersecting live ranges + // of v1 and v2. As a result a separate register is needed to keep the value + // defined by instr2 which is only used on the next iteration. + // If instr2 is not moved, no additional register is needed. The register + // used by instr1 is reused. + // To prevent such a situation a "other" dependency between instr1 and instr2 must be set. + void AddCrossIterationDependencies(SchedulingNode* node); + // Add dependencies nodes for the given `SchedulingNode`: inputs, environments, and side-effects. void AddDependencies(SchedulingNode* node, bool is_scheduling_barrier = false); diff --git a/test/706-checker-scheduler/src/Main.java b/test/706-checker-scheduler/src/Main.java index af18193388..5a66fbbebc 100644 --- a/test/706-checker-scheduler/src/Main.java +++ b/test/706-checker-scheduler/src/Main.java @@ -322,7 +322,7 @@ public class Main { // but has more complex chains of transforming the original references: // ParameterValue --> BoundType --> NullCheck --> ArrayGet. // ParameterValue --> BoundType --> NullCheck --> IntermediateAddress --> ArraySet. - // After using LSA to analyze the orginal references, the scheduler should be able + // After using LSA to analyze the original references, the scheduler should be able // to find out that 'a' and 'b' may alias, hence unable to schedule these ArraGet/Set. /// CHECK-START-ARM64: void Main.CrossOverLoop2(java.lang.Object, java.lang.Object) scheduler (before) @@ -584,9 +584,126 @@ public class Main { } } + // Check that instructions having cross iteration dependencies are not + // reordered. + // + /// CHECK-START-{ARM,ARM64}: void Main.testCrossItersDependencies() scheduler (before) + /// CHECK: <<ID1:i\d+>> Phi [{{i\d+}},<<ID3:i\d+>>] + /// CHECK: <<ID2:i\d+>> Phi [{{i\d+}},<<ID4:i\d+>>] + // + /// CHECK: <<ID3>> Sub [<<ID1>>,<<ID2>>] + /// CHECK: <<ID4>> Add [<<ID2>>,{{i\d+}}] + + /// CHECK-START-{ARM,ARM64}: void Main.testCrossItersDependencies() scheduler (after) + /// CHECK: <<ID1:i\d+>> Phi [{{i\d+}},<<ID3:i\d+>>] + /// CHECK: <<ID2:i\d+>> Phi [{{i\d+}},<<ID4:i\d+>>] + // + /// CHECK: <<ID3>> Sub [<<ID1>>,<<ID2>>] + /// CHECK: <<ID4>> Add [<<ID2>>,{{i\d+}}] + + /// CHECK-START-ARM: void Main.testCrossItersDependencies() disassembly (after) + /// CHECK: subs + /// CHECK: add + /// CHECK: adds + /// CHECK: ldrh + /// CHECK: cmp + /// CHECK: beq + + /// CHECK-START-ARM64: void Main.testCrossItersDependencies() disassembly (after) + /// CHECK: sub + /// CHECK: add + /// CHECK: add + /// CHECK: ldrh + /// CHECK: cbz + private static void testCrossItersDependencies() { + int[] data = {1, 2, 3, 0}; + int sub = 0; + int sum = data[0]; + for (int i = 1; data[i] != 0; ++i) { + sub -= sum; + sum += data[i]; + } + expectEquals(sub, -4); + expectEquals(sum, 6); + } + + // Check instructions defining values for the next iteration don't become + // self-dependent in a scheduling graph which prevents valid reordering. + // + /// CHECK-START-{ARM,ARM64}: void Main.testNoSelfDependantSchedNode(int) scheduler (before) + /// CHECK: IntermediateAddress + /// CHECK: ArrayGet + /// CHECK: LessThanOrEqual + /// CHECK: Select + /// CHECK: IntermediateAddress + /// CHECK: ArraySet + /// CHECK: Add + + /// CHECK-START-{ARM,ARM64}: void Main.testNoSelfDependantSchedNode(int) scheduler (after) + /// CHECK: IntermediateAddress + /// CHECK: ArrayGet + /// CHECK: IntermediateAddress + /// CHECK: LessThanOrEqual + /// CHECK: Select + /// CHECK: ArraySet + /// CHECK: Add + // + // Parameter n is to prevent unrolling of the main loop. + private static void testNoSelfDependantSchedNode(int n) { + final int MAX = 2; + int[] a = {1, 2, 3}; + int[] b = new int[a.length]; + n = Math.min(n, a.length); + for (int i = 0; i < n; ++i) { + int j = a[i]; + b[i] = (j > MAX ? MAX : 0); + } + expectEquals(b[0], 0); + expectEquals(b[1], 0); + expectEquals(b[2], 2); + } + + // In case of cross iteration dependencies when a value for the next iteration is also used on + // the current iteration a MOV instruction is generated anyway. In such cases setting dependency + // between scheduling nodes will not eliminate MOV. + // In the test 'i+1' is such an example. + // The test checks that a dependency between scheduling nodes (first ArrayGet and Add) is not + // setup and Add is scheduled before ArrayGet. + // + /// CHECK-START-{ARM,ARM64}: void Main.testNonPreventingSchedulingCrossItersDeps(int) scheduler (before) + /// CHECK: IntermediateAddress + /// CHECK-NEXT: ArrayGet + /// CHECK-NEXT: Add + /// CHECK-NEXT: ArrayGet + + /// CHECK-START-{ARM,ARM64}: void Main.testNonPreventingSchedulingCrossItersDeps(int) scheduler (after) + /// CHECK: IntermediateAddress + /// CHECK-NEXT: Add + /// CHECK-NEXT: ArrayGet + /// CHECK-NEXT: ArrayGet + // + // Parameter n is to prevent unrolling of the main loop. + private static void testNonPreventingSchedulingCrossItersDeps(int n) { + int[] a = {1, 2, 3}; + n = Math.min(n, a.length); + for (int i = 0; i < n - 1; ++i) { + if (a[i] < a[i + 1]) { + int tmp = a[i]; + a[i] = a[i + 1]; + a[i + 1] = tmp; + } + } + expectEquals(a[0], 2); + expectEquals(a[1], 3); + expectEquals(a[2], 1); + } + public static void main(String[] args) { testVecSetScalars(); testVecReplicateScalar(); + testCrossItersDependencies(); + testNoSelfDependantSchedNode(3); + testNonPreventingSchedulingCrossItersDeps(3); if ((arrayAccess() + intDiv(10)) != -35) { System.out.println("FAIL"); } |