Made idiom recognition more robust. Rationale: Recognition is now more robust with respect to operation order or even cancelling constants. Test: test-art-target, test-art-host Change-Id: I4e920150e20e1453bb081e3f0ddcda8f1c605672

commit: 5f80500bba3df590f0bfffea2ebe2adee7c40763 [log] [tgz]
author: Aart Bik <ajcbik@google.com> Tue May 16 16:42:41 2017 -0700
committer: Aart Bik <ajcbik@google.com> Thu May 18 09:30:36 2017 -0700
tree: ed18a25cc6dd29acea8641069295d81c3390d88d
parent: 536e54a6af85127d7f17037360deb08e0a7dc4eb [diff]
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 4067aa3..963df5a 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc

@@ -173,6 +173,51 @@
   return false;
 }
 
+// Detect up to two instructions a and b, and an acccumulated constant c.
+static bool IsAddConstHelper(HInstruction* instruction,
+                             /*out*/ HInstruction** a,
+                             /*out*/ HInstruction** b,
+                             /*out*/ int64_t* c,
+                             int32_t depth) {
+  static constexpr int32_t kMaxDepth = 8;  // don't search too deep
+  int64_t value = 0;
+  if (IsInt64AndGet(instruction, &value)) {
+    *c += value;
+    return true;
+  } else if (instruction->IsAdd() && depth <= kMaxDepth) {
+    return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
+           IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
+  } else if (*a == nullptr) {
+    *a = instruction;
+    return true;
+  } else if (*b == nullptr) {
+    *b = instruction;
+    return true;
+  }
+  return false;  // too many non-const operands
+}
+
+// Detect a + b + c for an optional constant c.
+static bool IsAddConst(HInstruction* instruction,
+                       /*out*/ HInstruction** a,
+                       /*out*/ HInstruction** b,
+                       /*out*/ int64_t* c) {
+  if (instruction->IsAdd()) {
+    // Try to find a + b and accumulated c.
+    if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
+        IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
+        *b != nullptr) {
+      return true;
+    }
+    // Found a + b.
+    *a = instruction->InputAt(0);
+    *b = instruction->InputAt(1);
+    *c = 0;
+    return true;
+  }
+  return false;
+}
+
 // Test vector restrictions.
 static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
   return (restrictions & tested) != 0;
@@ -1215,24 +1260,23 @@
   // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
   // (note whether the sign bit in higher precision is shifted in has no effect
   // on the narrow precision computed by the idiom).
-  int64_t value = 0;
+  int64_t distance = 0;
   if ((instruction->IsShr() ||
        instruction->IsUShr()) &&
-      IsInt64AndGet(instruction->InputAt(1), /*out*/ &value) && value == 1) {
-    //
-    // TODO: make following code less sensitive to associativity and commutativity differences.
-    //
-    HInstruction* x = instruction->InputAt(0);
-    // Test for an optional rounding part (x + 1) >> 1.
-    bool is_rounded = false;
-    if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), /*out*/ &value) && value == 1) {
-      x = x->InputAt(0);
-      is_rounded = true;
-    }
-    // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
-    if (x->IsAdd()) {
-      HInstruction* a = x->InputAt(0);
-      HInstruction* b = x->InputAt(1);
+      IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
+    // Test for (a + b + c) >> 1 for optional constant c.
+    HInstruction* a = nullptr;
+    HInstruction* b = nullptr;
+    int64_t       c = 0;
+    if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
+      // Accept c == 1 (rounded) or c == 0 (not rounded).
+      bool is_rounded = false;
+      if (c == 1) {
+        is_rounded = true;
+      } else if (c != 0) {
+        return false;
+      }
+      // Accept consistent zero or sign extension on operands a and b.
       HInstruction* r = nullptr;
       HInstruction* s = nullptr;
       bool is_unsigned = false;

diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
index db495f6..4e6b4bd 100644
--- a/test/646-checker-hadd-short/src/Main.java
+++ b/test/646-checker-hadd-short/src/Main.java

@@ -49,6 +49,34 @@
     }
   }
 
+  /// CHECK-START: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I10:i\d+>>  IntConstant 10                      loop:none
+  /// CHECK-DAG: <<M10:i\d+>>  IntConstant -10                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I10>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M10>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add3>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      // Cancelling constant computations do not confuse recognition.
+      bo[i] = (short) (((b1[i] + 10) + (b2[i] - 10)) >> 1);
+    }
+  }
+
   /// CHECK-START: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
   /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
@@ -99,6 +127,59 @@
     }
   }
 
+  /// CHECK-START: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      // Slightly different order in idiom does not confuse recognition.
+      bo[i] = (short) (((1 + b1[i]) + b2[i]) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I10:i\d+>>  IntConstant 10                      loop:none
+  /// CHECK-DAG: <<M9:i\d+>>   IntConstant -9                      loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I10>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M9>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add3>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_signed_alt2(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      // Computations that cancel to adding 1 also do not confuse recognition.
+      bo[i] = (short) (((b1[i] + 10) + (b2[i] - 9)) >> 1);
+    }
+  }
+
   /// CHECK-START: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
   /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
@@ -126,6 +207,34 @@
     }
   }
 
+  /// CHECK-START: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<And2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<And1>>,<<Add1>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_unsigned_alt(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      // Slightly different order in idiom does not confuse recognition.
+      bo[i] = (short) ((b1[i] & 0xffff) + ((b2[i] & 0xffff) + 1) >> 1);
+    }
+  }
+
   /// CHECK-START: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
   /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                   loop:none
@@ -200,6 +309,11 @@
       short e = (short) ((sB1[i] + sB2[i]) >> 1);
       expectEquals(e, sBo[i]);
     }
+    halving_add_signed_alt(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
     halving_add_unsigned(sB1, sB2, sBo);
     for (int i = 0; i < M; i++) {
       short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff)) >> 1);
@@ -210,11 +324,26 @@
       short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
       expectEquals(e, sBo[i]);
     }
+    rounding_halving_add_signed_alt(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_signed_alt2(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
     rounding_halving_add_unsigned(sB1, sB2, sBo);
     for (int i = 0; i < M; i++) {
       short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
       expectEquals(e, sBo[i]);
     }
+    rounding_halving_add_unsigned_alt(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
     halving_add_signed_constant(sB1, sBo);
     for (int i = 0; i < M; i++) {
       short e = (short) ((sB1[i] + 0x7fff) >> 1);
commit	5f80500bba3df590f0bfffea2ebe2adee7c40763	[log] [tgz]
author	Aart Bik <ajcbik@google.com>	Tue May 16 16:42:41 2017 -0700
committer	Aart Bik <ajcbik@google.com>	Thu May 18 09:30:36 2017 -0700
tree	ed18a25cc6dd29acea8641069295d81c3390d88d
parent	536e54a6af85127d7f17037360deb08e0a7dc4eb [diff]