ART: Fix a compiler crash for VectorizeDef() idioms.

SAD vectorization idiom (and DotProduct which was based on it)
had a bug when some instruction in the loop was visited twice
causing a compiler crash. GenerateVecOp() was called for both
the idiom root instruction and its argument in the idiom
vectorization routine; however the argument could have been
already processed by that time. It happened when two vectorization
idioms' matched patterns had a common sub-expression.

Test: test-art-target.
Test: 623-checker-loop-regressions.
Change-Id: I8823c52f8ef62377c29310f0e335b9728d11068a
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 5784707..770a011 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -2044,13 +2044,13 @@
       (reduction_type != DataType::Type::kInt32 && reduction_type != DataType::Type::kInt64)) {
     return false;
   }
-  HInstruction* q = instruction->InputAt(0);
-  HInstruction* v = instruction->InputAt(1);
+  HInstruction* acc = instruction->InputAt(0);
+  HInstruction* abs = instruction->InputAt(1);
   HInstruction* a = nullptr;
   HInstruction* b = nullptr;
-  if (v->IsAbs() &&
-      v->GetType() == reduction_type &&
-      IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) {
+  if (abs->IsAbs() &&
+      abs->GetType() == reduction_type &&
+      IsSubConst2(graph_, abs->InputAt(0), /*out*/ &a, /*out*/ &b)) {
     DCHECK(a != nullptr && b != nullptr);
   } else {
     return false;
@@ -2076,16 +2076,16 @@
   // idiomatic operation. Sequential code uses the original scalar expressions.
   DCHECK(r != nullptr && s != nullptr);
   if (generate_code && vector_mode_ != kVector) {  // de-idiom
-    r = s = v->InputAt(0);
+    r = s = abs->InputAt(0);
   }
-  if (VectorizeUse(node, q, generate_code, sub_type, restrictions) &&
+  if (VectorizeUse(node, acc, generate_code, sub_type, restrictions) &&
       VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
       VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
     if (generate_code) {
       if (vector_mode_ == kVector) {
         vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
             global_allocator_,
-            vector_map_->Get(q),
+            vector_map_->Get(acc),
             vector_map_->Get(r),
             vector_map_->Get(s),
             HVecOperation::ToProperType(reduction_type, is_unsigned),
@@ -2093,8 +2093,14 @@
             kNoDexPc));
         MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
       } else {
-        GenerateVecOp(v, vector_map_->Get(r), nullptr, reduction_type);
-        GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type);
+        // "GenerateVecOp()" must not be called more than once for each original loop body
+        // instruction. As the SAD idiom processes both "current" instruction ("instruction")
+        // and its ABS input in one go, we must check that for the scalar case the ABS instruction
+        // has not yet been processed.
+        if (vector_map_->find(abs) == vector_map_->end()) {
+          GenerateVecOp(abs, vector_map_->Get(r), nullptr, reduction_type);
+        }
+        GenerateVecOp(instruction, vector_map_->Get(acc), vector_map_->Get(abs), reduction_type);
       }
     }
     return true;
@@ -2116,20 +2122,20 @@
     return false;
   }
 
-  HInstruction* q = instruction->InputAt(0);
-  HInstruction* v = instruction->InputAt(1);
-  if (!v->IsMul() || v->GetType() != reduction_type) {
+  HInstruction* const acc = instruction->InputAt(0);
+  HInstruction* const mul = instruction->InputAt(1);
+  if (!mul->IsMul() || mul->GetType() != reduction_type) {
     return false;
   }
 
-  HInstruction* a = v->InputAt(0);
-  HInstruction* b = v->InputAt(1);
-  HInstruction* r = a;
-  HInstruction* s = b;
-  DataType::Type op_type = GetNarrowerType(a, b);
+  HInstruction* const mul_left = mul->InputAt(0);
+  HInstruction* const mul_right = mul->InputAt(1);
+  HInstruction* r = mul_left;
+  HInstruction* s = mul_right;
+  DataType::Type op_type = GetNarrowerType(mul_left, mul_right);
   bool is_unsigned = false;
 
-  if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) {
+  if (!IsNarrowerOperands(mul_left, mul_right, op_type, &r, &s, &is_unsigned)) {
     return false;
   }
   op_type = HVecOperation::ToProperType(op_type, is_unsigned);
@@ -2143,17 +2149,17 @@
   // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand
   // idiomatic operation. Sequential code uses the original scalar expressions.
   if (generate_code && vector_mode_ != kVector) {  // de-idiom
-    r = a;
-    s = b;
+    r = mul_left;
+    s = mul_right;
   }
-  if (VectorizeUse(node, q, generate_code, op_type, restrictions) &&
+  if (VectorizeUse(node, acc, generate_code, op_type, restrictions) &&
       VectorizeUse(node, r, generate_code, op_type, restrictions) &&
       VectorizeUse(node, s, generate_code, op_type, restrictions)) {
     if (generate_code) {
       if (vector_mode_ == kVector) {
         vector_map_->Put(instruction, new (global_allocator_) HVecDotProd(
             global_allocator_,
-            vector_map_->Get(q),
+            vector_map_->Get(acc),
             vector_map_->Get(r),
             vector_map_->Get(s),
             reduction_type,
@@ -2162,8 +2168,14 @@
             kNoDexPc));
         MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
       } else {
-        GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type);
-        GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type);
+        // "GenerateVecOp()" must not be called more than once for each original loop body
+        // instruction. As the DotProd idiom processes both "current" instruction ("instruction")
+        // and its MUL input in one go, we must check that for the scalar case the MUL instruction
+        // has not yet been processed.
+        if (vector_map_->find(mul) == vector_map_->end()) {
+          GenerateVecOp(mul, vector_map_->Get(r), vector_map_->Get(s), reduction_type);
+        }
+        GenerateVecOp(instruction, vector_map_->Get(acc), vector_map_->Get(mul), reduction_type);
       }
     }
     return true;
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 6fa38f8..36dfc84 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -589,6 +589,205 @@
     return a[3];
   }
 
+  // Dot product and SAD vectorization idioms used to have a bug when some
+  // instruction in the loop was visited twice causing a compiler crash.
+  // It happened when two vectorization idioms' matched patterns had a common
+  // sub-expression.
+
+  // Idioms common sub-expression bug: DotProduct and ArraySet.
+  //
+  /// CHECK-START-ARM64: int Main.testDotProdAndSet(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG:       VecDotProd
+  /// CHECK-DAG:       VecStore
+  public static final int testDotProdAndSet(byte[] a, byte[] b, byte[] c) {
+    int s = 1;
+    for (int i = 0; i < b.length; i++) {
+      int temp = a[i] * b[i];
+      c[i]= (byte)temp;
+      s += temp;
+    }
+    return s - 1;
+  }
+
+  // Idioms common sub-expression bug: DotProduct and DotProduct.
+  //
+  /// CHECK-START-ARM64: int Main.testDotProdAndDotProd(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG:       VecDotProd
+  /// CHECK-DAG:       VecDotProd
+  public static final int testDotProdAndDotProd(byte[] a, byte[] b) {
+    int s0 = 1;
+    int s1 = 1;
+    for (int i = 0; i < b.length; i++) {
+      int temp = a[i] * b[i];
+      s0 += temp;
+      s1 += temp;
+    }
+    return s0 + s1;
+  }
+
+  // Idioms common sub-expression bug: SAD and ArraySet.
+  //
+  /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSet(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG:       VecSADAccumulate
+  /// CHECK-DAG:       VecStore
+  public static int testSADAndSet(int[] x, int[] y, int[] z) {
+    int min_length = Math.min(x.length, y.length);
+    int sad = 0;
+    for (int i = 0; i < min_length; i++) {
+      int temp = Math.abs(x[i] - y[i]);
+      z[i] = temp;
+      sad += temp;
+    }
+    return sad;
+  }
+
+  // Idioms common sub-expression bug: SAD and SAD.
+  /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSAD(int[], int[]) loop_optimization (after)
+  /// CHECK-DAG:       VecSADAccumulate
+  /// CHECK-DAG:       VecSADAccumulate
+  public static final int testSADAndSAD(int[] x, int[] y) {
+    int s0 = 1;
+    int s1 = 1;
+    for (int i = 0; i < x.length; i++) {
+      int temp = Math.abs(x[i] - y[i]);
+      s0 += temp;
+      s1 += temp;
+    }
+    return s0 + s1;
+  }
+
+  // Idioms common sub-expression bug: DotProd and DotProd with extra mul.
+  //
+  /// CHECK-START-ARM64: int Main.testDotProdAndDotProdExtraMul0(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG:       VecMul
+  /// CHECK-DAG:       VecDotProd
+  /// CHECK-DAG:       VecDotProd
+  public static final int testDotProdAndDotProdExtraMul0(byte[] a, byte[] b) {
+    int s0 = 1;
+    int s1 = 1;
+    for (int i = 0; i < b.length; i++) {
+      int temp0 = a[i] * b[i];
+      int temp1 = (byte)(temp0) * a[i];
+      s0 += temp1;
+      s1 += temp0;
+    }
+    return s0 + s1;
+  }
+
+  // Idioms common sub-expression bug: DotProd and DotProd with extra mul (reversed order).
+  //
+  /// CHECK-START-ARM64: int Main.testDotProdAndDotProdExtraMul1(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG:       VecMul
+  /// CHECK-DAG:       VecDotProd
+  /// CHECK-DAG:       VecDotProd
+  public static final int testDotProdAndDotProdExtraMul1(byte[] a, byte[] b) {
+    int s0 = 1;
+    int s1 = 1;
+    for (int i = 0; i < b.length; i++) {
+      int temp0 = a[i] * b[i];
+      int temp1 = (byte)(temp0) * a[i];
+      s0 += temp0;
+      s1 += temp1;
+    }
+    return s0 + s1;
+  }
+
+  // Idioms common sub-expression bug: SAD and SAD with extra abs.
+  //
+  /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSADExtraAbs0(int[], int[]) loop_optimization (after)
+  /// CHECK-DAG:       VecSub
+  /// CHECK-DAG:       VecAbs
+  /// CHECK-DAG:       VecSADAccumulate
+  /// CHECK-DAG:       VecSADAccumulate
+  public static final int testSADAndSADExtraAbs0(int[] x, int[] y) {
+    int s0 = 1;
+    int s1 = 1;
+    for (int i = 0; i < x.length; i++) {
+      int temp0 = Math.abs(x[i] - y[i]);
+      int temp1 = Math.abs(temp0 - y[i]);
+      s0 += temp1;
+      s1 += temp0;
+    }
+    return s0 + s1;
+  }
+
+  // Idioms common sub-expression bug: SAD and SAD with extra abs (reversed order).
+  //
+  /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSADExtraAbs1(int[], int[]) loop_optimization (after)
+  /// CHECK-DAG:       VecSub
+  /// CHECK-DAG:       VecAbs
+  /// CHECK-DAG:       VecSADAccumulate
+  /// CHECK-DAG:       VecSADAccumulate
+  public static final int testSADAndSADExtraAbs1(int[] x, int[] y) {
+    int s0 = 1;
+    int s1 = 1;
+    for (int i = 0; i < x.length; i++) {
+      int temp0 = Math.abs(x[i] - y[i]);
+      int temp1 = Math.abs(temp0 - y[i]);
+      s0 += temp0;
+      s1 += temp1;
+    }
+    return s0 + s1;
+  }
+
+
+  // Idioms common sub-expression bug: SAD and DotProd combined.
+  //
+  /// CHECK-START-ARM64: int Main.testSADAndDotProdCombined0(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG:       VecSub
+  /// CHECK-DAG:       VecSADAccumulate
+  /// CHECK-DAG:       VecDotProd
+  public static final int testSADAndDotProdCombined0(byte[] x, byte[] y) {
+    int s0 = 1;
+    int s1 = 1;
+    for (int i = 0; i < x.length; i++) {
+      int temp0 = x[i] - y[i];
+      int temp1 = Math.abs(temp0);
+      int temp2 = x[i] * (byte)(temp0);
+
+      s0 += temp1;
+      s1 += temp2;
+    }
+    return s0 + s1;
+  }
+
+  // Idioms common sub-expression bug: SAD and DotProd combined (reversed order).
+  /// CHECK-START-ARM64: int Main.testSADAndDotProdCombined1(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG:       VecSub
+  /// CHECK-DAG:       VecSADAccumulate
+  /// CHECK-DAG:       VecDotProd
+  public static final int testSADAndDotProdCombined1(byte[] x, byte[] y) {
+    int s0 = 1;
+    int s1 = 1;
+    for (int i = 0; i < x.length; i++) {
+      int temp0 = x[i] - y[i];
+      int temp1 = Math.abs(temp0);
+      int temp2 = x[i] * (byte)(temp0);
+
+      s0 += temp2;
+      s1 += temp1;
+    }
+    return s0 + s1;
+  }
+
+  public static final int ARRAY_SIZE = 512;
+
+  private static byte[] createAndInitByteArray(int x) {
+    byte[] a = new byte[ARRAY_SIZE];
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (byte)((~i) + x);
+    }
+    return a;
+  }
+
+  private static int[] createAndInitIntArray(int x) {
+    int[] a = new int[ARRAY_SIZE];
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (~i) + x;
+    }
+    return a;
+  }
+
   public static void main(String[] args) {
     System.loadLibrary(args[0]);
 
@@ -780,6 +979,59 @@
 
     expectEquals(10, reductionIntoReplication());
 
+    {
+        byte[] b_a = createAndInitByteArray(1);
+        byte[] b_b = createAndInitByteArray(2);
+        byte[] b_c = createAndInitByteArray(3);
+        expectEquals(2731008, testDotProdAndSet(b_a, b_b, b_c));
+    }
+    {
+        byte[] b_a = createAndInitByteArray(1);
+        byte[] b_b = createAndInitByteArray(2);
+        expectEquals(5462018, testDotProdAndDotProd(b_a, b_b));
+    }
+    {
+        int[] i_a = createAndInitIntArray(1);
+        int[] i_b = createAndInitIntArray(2);
+        int[] i_c = createAndInitIntArray(3);
+        expectEquals(512, testSADAndSet(i_a, i_b, i_c));
+    }
+    {
+        int[] i_a = createAndInitIntArray(1);
+        int[] i_b = createAndInitIntArray(2);
+        expectEquals(1026, testSADAndSAD(i_a, i_b));
+    }
+    {
+        byte[] b_a = createAndInitByteArray(1);
+        byte[] b_b = createAndInitByteArray(2);
+        expectEquals(2731266, testDotProdAndDotProdExtraMul0(b_a, b_b));
+    }
+    {
+        byte[] b_a = createAndInitByteArray(1);
+        byte[] b_b = createAndInitByteArray(2);
+        expectEquals(2731266, testDotProdAndDotProdExtraMul1(b_a, b_b));
+    }
+    {
+        int[] i_a = createAndInitIntArray(1);
+        int[] i_b = createAndInitIntArray(2);
+        expectEquals(131330, testSADAndSADExtraAbs0(i_a, i_b));
+    }
+    {
+        int[] i_a = createAndInitIntArray(1);
+        int[] i_b = createAndInitIntArray(2);
+        expectEquals(131330, testSADAndSADExtraAbs1(i_a, i_b));
+    }
+    {
+        byte[] b_a = createAndInitByteArray(1);
+        byte[] b_b = createAndInitByteArray(2);
+        expectEquals(1278, testSADAndDotProdCombined0(b_a, b_b));
+    }
+    {
+        byte[] b_a = createAndInitByteArray(1);
+        byte[] b_b = createAndInitByteArray(2);
+        expectEquals(1278, testSADAndDotProdCombined1(b_a, b_b));
+    }
+
     System.out.println("passed");
   }