ART: Fix vectorizer HalvingAdd idiom.

IsAddConst2 function tried to extract addition chains
for the halving add idiom: (A + B) >> 1. The problem
was that regular shift right (x >> 1) was accepted for the
idiom (with {A: x, B: 0}) and not processed as a shift - which
broke the assumptions on shifts right and operand signedness.
This CL fixes that.

Test: 646-checker-simd-hadd.
Test: test-art-target.

Change-Id: Icf71e1a8e8c54e68114d7d5d6c4aa8a47ea5234d
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
index 7d7bb94..7a67fc5 100644
--- a/compiler/optimizing/load_store_analysis.cc
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -30,10 +30,6 @@
   return std::max(l1, l2) <= std::min(h1, h2);
 }
 
-static bool IsAddOrSub(const HInstruction* instruction) {
-  return instruction->IsAdd() || instruction->IsSub();
-}
-
 static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1,
                                      const size_t vector_length1,
                                      const HInstruction* idx2,
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 770a011..b72fbb0 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -305,7 +305,8 @@
                         /*out*/ HInstruction** a,
                         /*out*/ HInstruction** b,
                         /*out*/ int64_t* c) {
-  if (IsAddConst(instruction, a, b, c) && *a != nullptr) {
+  // We want an actual add/sub and not the trivial case where {b: 0, c: 0}.
+  if (IsAddOrSub(instruction) && IsAddConst(instruction, a, b, c) && *a != nullptr) {
     if (*b == nullptr) {
       // Constant is usually already present, unless accumulated.
       *b = graph->GetConstant(instruction->GetType(), (*c));
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7ed5bca..611e0e7 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -8121,6 +8121,10 @@
   return instruction;
 }
 
+inline bool IsAddOrSub(const HInstruction* instruction) {
+  return instruction->IsAdd() || instruction->IsSub();
+}
+
 void RemoveEnvironmentUses(HInstruction* instruction);
 bool HasEnvironmentUsedByOthers(HInstruction* instruction);
 void ResetEnvironmentInputRecords(HInstruction* instruction);
diff --git a/test/646-checker-simd-hadd/expected.txt b/test/646-checker-simd-hadd/expected.txt
index 93b1f4f..e2b68e7 100644
--- a/test/646-checker-simd-hadd/expected.txt
+++ b/test/646-checker-simd-hadd/expected.txt
@@ -4,3 +4,4 @@
 HaddByte passed
 HaddShort passed
 HaddChar passed
+HaddOther passed
diff --git a/test/646-checker-simd-hadd/src/HaddOther.java b/test/646-checker-simd-hadd/src/HaddOther.java
new file mode 100644
index 0000000..0339f24
--- /dev/null
+++ b/test/646-checker-simd-hadd/src/HaddOther.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class HaddOther {
+  private static final int N = 2 * 1024;
+  private static final int M = N + 31;
+
+  //  Should be just shift right, not halving add.
+  //
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_short2short(short[], short[]) loop_optimization (after)
+  /// CHECK: VecShr
+
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_short2short(short[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecHalvingAdd
+  private static void test_no_hadd_short2short(short[] a, short[] out) {
+    int min_length = Math.min(out.length, a.length);
+    for (int i = 0; i < min_length; i++) {
+      out[i] = (short) (a[i] >> 1);
+    }
+  }
+
+  //  This loop is not vectorized: shift right with a signed type is not supported.
+  //
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_short2short_logical(short[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
+  /// CHECK-NOT: VecHalvingAdd
+  private static void test_no_hadd_short2short_logical(short[] a, short[] out) {
+    int min_length = Math.min(out.length, a.length);
+    for (int i = 0; i < min_length; i++) {
+      out[i] = (short) (a[i] >>> 1);
+    }
+  }
+
+  //  This loop is not vectorized: mismatched packed type size.
+  //
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_int2short(int[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
+  /// CHECK-NOT: VecHalvingAdd
+  private static void test_no_hadd_int2short(int[] a, short[] out) {
+    int min_length = Math.min(out.length, a.length);
+    for (int i = 0; i < min_length; i++) {
+      out[i] = (short) (a[i] >> 1);
+    }
+  }
+
+  //  Should be just shift right, not halving add.
+  //
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_int2int(int[], int[]) loop_optimization (after)
+  /// CHECK: VecShr
+
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_int2int(int[], int[]) loop_optimization (after)
+  /// CHECK-NOT: VecHalvingAdd
+  private static void test_no_hadd_int2int(int[] a, int[] out) {
+    int min_length = Math.min(out.length, a.length);
+    for (int i = 0; i < min_length; i++) {
+      out[i] = a[i] >> 1;
+    }
+  }
+
+  //  Should be just add and shift right, not halving add.
+  //
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_sum_casted(short[], short[], short[]) loop_optimization (after)
+  /// CHECK: VecAdd
+  /// CHECK: VecShr
+
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_sum_casted(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-NOT: VecHalvingAdd
+  private static void test_no_hadd_sum_casted(short[] a, short[] b, short[] out) {
+    int min_length = Math.min(out.length, Math.min(a.length, b.length));
+    for (int i = 0; i < min_length; i++) {
+      out[i] = (short) (((short) (a[i] + b[i])) >> 1);
+    }
+  }
+
+  //  This loop is not vectorized: mismatched packed type size.
+  //
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_sum_casted_ints(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
+  /// CHECK-NOT: VecHalvingAdd
+  private static void test_no_hadd_sum_casted_ints(int[] a, int[] b, int[] out) {
+    int min_length = Math.min(out.length, Math.min(a.length, b.length));
+    for (int i = 0; i < min_length; i++) {
+      out[i] = (short) ((short) (a[i] + b[i]) >> 1);
+    }
+  }
+
+  //  Should be an add, followed by a halving add.
+  //
+  /// CHECK-START-{ARM,ARM64}: void HaddOther.test_no_hadd_sum_casted_plus_const(short[], short[], short[]) loop_optimization (after)
+  /// CHECK: VecAdd
+  /// CHECK: VecHalvingAdd
+  private static void test_no_hadd_sum_casted_plus_const(short[] a, short[] b, short[] out) {
+    int min_length = Math.min(out.length, Math.min(a.length, b.length));
+    for (int i = 0; i < min_length; i++) {
+      out[i] = (short) (((short) (a[i] + b[i]) + 1) >> 1);
+    }
+  }
+
+  public static void main() {
+    short[] sA = new short[M];
+    short[] sB = new short[M];
+    short[] sOut = new short[M];
+    int[] iA = new int[M];
+    int[] iB = new int[M];
+    int[] iOut = new int[M];
+
+    // Some interesting values.
+    short[] interesting = {
+      (short) 0x0000,
+      (short) 0x0001,
+      (short) 0x0002,
+      (short) 0x1234,
+      (short) 0x8000,
+      (short) 0x8001,
+      (short) 0x7fff,
+      (short) 0xffff
+    };
+    // Initialize cross-values to test all cases, and also
+    // set up some extra values to exercise the cleanup loop.
+    for (int i = 0; i < M; i++) {
+      sA[i] = (short) i;
+      sB[i] = interesting[i & 7];
+      iA[i] = i;
+      iB[i] = interesting[i & 7];
+    }
+
+    test_no_hadd_short2short(sA, sOut);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (sA[i] >> 1);
+      expectEquals(e, sOut[i]);
+    }
+    test_no_hadd_short2short_logical(sA, sOut);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (sA[i] >>> 1);
+      expectEquals(e, sOut[i]);
+    }
+    test_no_hadd_int2short(iA, sOut);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (iA[i] >> 1);
+      expectEquals(e, sOut[i]);
+    }
+    test_no_hadd_int2int(iA, iOut);
+    for (int i = 0; i < M; i++) {
+      int e = iA[i] >> 1;
+      expectEquals(e, iOut[i]);
+    }
+    test_no_hadd_sum_casted(sA, sB, sOut);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((short) (sA[i] + sB[i])) >> 1);
+      expectEquals(e, sOut[i]);
+    }
+    test_no_hadd_sum_casted_ints(iA, iB, iOut);
+    for (int i = 0; i < M; i++) {
+      int e = (short) ((short) (iA[i] + iB[i]) >> 1);
+      expectEquals(e, iOut[i]);
+    }
+    test_no_hadd_sum_casted_plus_const(sA, sB, sOut);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((short) (sA[i] + sB[i]) + 1) >> 1);
+      expectEquals(e, sOut[i]);
+    }
+
+    System.out.println("HaddOther passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/646-checker-simd-hadd/src/Main.java b/test/646-checker-simd-hadd/src/Main.java
index c5ec7a7..1fb1de2 100644
--- a/test/646-checker-simd-hadd/src/Main.java
+++ b/test/646-checker-simd-hadd/src/Main.java
@@ -22,5 +22,6 @@
     HaddByte.main();
     HaddShort.main();
     HaddChar.main();
+    HaddOther.main();
   }
 }