Fix intersecting live ranges created by instruction scheduler

When scheduling code like the following:
LOOP:
   v2=phi(v0, v1)
   use(v2)
   v1=...
   goto LOOP

the instruction scheduler can move 'v1=...' before 'use(v2)'. This
causes live ranges of v1 and v2 to intersect and results to a MOV
instruction to be created.

The CL fixes this.

Improvements, Pixel3:
  Little CPU, arm64
    micro/GCCLoops
      Example12       14.1%
      Example10b      11.0%
      Example23       8.1%
      Example24       6.6%
      Example10a      5.0%
    FFT workload      4.7%
    Compress workload 1.2%

  Little CPU, arm32
    micro/GCCLoops
      Example23         7.5%
      Example24         4.3%
    MonteCarlo workload 1.35%

  Big CPU, arm32 and arm64
    No significant improvements

No significant regressions (> 5%) are found.

Test: test.py --host --optimizing --jit --gtest
Test: test.py --target --optimizing --jit
Test: run-gtests.sh
Change-Id: I1e4282af18f2d51fde5325a0c00a57e8bbc4fbed
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index e5ff8a8..f722cf9 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -282,6 +282,36 @@
   }
 }
 
+void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) {
+  for (HInstruction* instruction : node->GetInstruction()->GetInputs()) {
+    // Having a phi-function from a loop header as an input means the current node of the
+    // scheduling graph has a cross-iteration dependency because such phi-functions bring values
+    // from the previous iteration to the current iteration.
+    if (!instruction->IsLoopHeaderPhi()) {
+      continue;
+    }
+    for (HInstruction* phi_input : instruction->GetInputs()) {
+      // As a scheduling graph of the current basic block is built by
+      // processing instructions bottom-up, nullptr returned by GetNode means
+      // an instruction defining a value for the phi is either before the
+      // instruction represented by node or it is in a different basic block.
+      SchedulingNode* def_node = GetNode(phi_input);
+
+      // We don't create a dependency if there are uses besides the use in phi.
+      // In such cases a register to hold phi_input is usually allocated and
+      // a MOV instruction is generated. In cases with multiple uses and no MOV
+      // instruction, reordering creating a MOV instruction can improve
+      // performance more than an attempt to avoid a MOV instruction.
+      if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) {
+        // We have an implicit data dependency between node and def_node.
+        // AddAddDataDependency cannot be used because it is for explicit data dependencies.
+        // So AddOtherDependency is used.
+        AddOtherDependency(def_node, node);
+      }
+    }
+  }
+}
+
 void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node,
                                       bool is_scheduling_barrier) {
   HInstruction* instruction = instruction_node->GetInstruction();
@@ -340,7 +370,11 @@
       if (other_node->IsSchedulingBarrier()) {
         // We have reached a scheduling barrier so we can stop further
         // processing.
-        DCHECK(other_node->HasOtherDependency(instruction_node));
+        //
+        // As a "other" dependency is not set up if a data dependency exists, we need to check that
+        // one of them must exist.
+        DCHECK(other_node->HasOtherDependency(instruction_node)
+               || other_node->HasDataDependency(instruction_node));
         break;
       }
       if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) {
@@ -372,6 +406,8 @@
       AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
     }
   }
+
+  AddCrossIterationDependencies(instruction_node);
 }
 
 static const std::string InstructionTypeId(const HInstruction* instruction) {