ART: Implement predicated SIMD vectorization.

This CL brings support for predicated execution for
auto-vectorizer and implements arm64 SVE vector backend.

This version passes all the VIXL simulator-runnable tests in
SVE mode with checker off (as all VecOp CHECKs need to be
adjusted for an extra input) and all tests in NEON mode.

Test: art SIMD tests on VIXL simulator.
Test: art tests on FVP (steps in test/README.arm_fvp.md)

Change-Id: Ib78bde31a15e6713d875d6668ad4458f5519605f
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 7137617..8970372 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -289,56 +289,72 @@
   }
 
   ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator();
-
-  if (mul->HasOnlyOneNonEnvironmentUse()) {
-    HInstruction* use = mul->GetUses().front().GetUser();
-    if (use->IsVecAdd() || use->IsVecSub()) {
-      // Replace code looking like
-      //    VECMUL tmp, x, y
-      //    VECADD/SUB dst, acc, tmp
-      // with
-      //    VECMULACC dst, acc, x, y
-      // Note that we do not want to (unconditionally) perform the merge when the
-      // multiplication has multiple uses and it can be merged in all of them.
-      // Multiple uses could happen on the same control-flow path, and we would
-      // then increase the amount of work. In the future we could try to evaluate
-      // whether all uses are on different control-flow paths (using dominance and
-      // reverse-dominance information) and only perform the merge when they are.
-      HInstruction* accumulator = nullptr;
-      HVecBinaryOperation* binop = use->AsVecBinaryOperation();
-      HInstruction* binop_left = binop->GetLeft();
-      HInstruction* binop_right = binop->GetRight();
-      // This is always true since the `HVecMul` has only one use (which is checked above).
-      DCHECK_NE(binop_left, binop_right);
-      if (binop_right == mul) {
-        accumulator = binop_left;
-      } else if (use->IsVecAdd()) {
-        DCHECK_EQ(binop_left, mul);
-        accumulator = binop_right;
-      }
-
-      HInstruction::InstructionKind kind =
-          use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
-      if (accumulator != nullptr) {
-        HVecMultiplyAccumulate* mulacc =
-            new (allocator) HVecMultiplyAccumulate(allocator,
-                                                   kind,
-                                                   accumulator,
-                                                   mul->GetLeft(),
-                                                   mul->GetRight(),
-                                                   binop->GetPackedType(),
-                                                   binop->GetVectorLength(),
-                                                   binop->GetDexPc());
-
-        binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
-        DCHECK(!mul->HasUses());
-        mul->GetBlock()->RemoveInstruction(mul);
-        return true;
-      }
-    }
+  if (!mul->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+  HInstruction* binop = mul->GetUses().front().GetUser();
+  if (!binop->IsVecAdd() && !binop->IsVecSub()) {
+    return false;
   }
 
-  return false;
+  // Replace code looking like
+  //    VECMUL tmp, x, y
+  //    VECADD/SUB dst, acc, tmp
+  // with
+  //    VECMULACC dst, acc, x, y
+  // Note that we do not want to (unconditionally) perform the merge when the
+  // multiplication has multiple uses and it can be merged in all of them.
+  // Multiple uses could happen on the same control-flow path, and we would
+  // then increase the amount of work. In the future we could try to evaluate
+  // whether all uses are on different control-flow paths (using dominance and
+  // reverse-dominance information) and only perform the merge when they are.
+  HInstruction* accumulator = nullptr;
+  HVecBinaryOperation* vec_binop = binop->AsVecBinaryOperation();
+  HInstruction* binop_left = vec_binop->GetLeft();
+  HInstruction* binop_right = vec_binop->GetRight();
+  // This is always true since the `HVecMul` has only one use (which is checked above).
+  DCHECK_NE(binop_left, binop_right);
+  if (binop_right == mul) {
+    accumulator = binop_left;
+  } else {
+    DCHECK_EQ(binop_left, mul);
+    // Only addition is commutative.
+    if (!binop->IsVecAdd()) {
+      return false;
+    }
+    accumulator = binop_right;
+  }
+
+  DCHECK(accumulator != nullptr);
+  HInstruction::InstructionKind kind =
+      binop->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
+
+  bool predicated_simd = vec_binop->IsPredicated();
+  if (predicated_simd && !HVecOperation::HaveSamePredicate(vec_binop, mul)) {
+    return false;
+  }
+
+  HVecMultiplyAccumulate* mulacc =
+      new (allocator) HVecMultiplyAccumulate(allocator,
+                                             kind,
+                                             accumulator,
+                                             mul->GetLeft(),
+                                             mul->GetRight(),
+                                             vec_binop->GetPackedType(),
+                                             vec_binop->GetVectorLength(),
+                                             vec_binop->GetDexPc());
+
+
+
+  vec_binop->GetBlock()->ReplaceAndRemoveInstructionWith(vec_binop, mulacc);
+  if (predicated_simd) {
+    mulacc->SetGoverningPredicate(vec_binop->GetGoverningPredicate(),
+                                  vec_binop->GetPredicationKind());
+  }
+
+  DCHECK(!mul->HasUses());
+  mul->GetBlock()->RemoveInstruction(mul);
+  return true;
 }
 
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {