Made idiom recognition more robust.
Rationale:
Recognition is now more robust with respect to
operation order or even cancelling constants.
Test: test-art-target, test-art-host
Change-Id: I4e920150e20e1453bb081e3f0ddcda8f1c605672
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 4067aa3..963df5a 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -173,6 +173,51 @@
return false;
}
+// Detect up to two instructions a and b, and an acccumulated constant c.
+static bool IsAddConstHelper(HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c,
+ int32_t depth) {
+ static constexpr int32_t kMaxDepth = 8; // don't search too deep
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ *c += value;
+ return true;
+ } else if (instruction->IsAdd() && depth <= kMaxDepth) {
+ return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
+ IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
+ } else if (*a == nullptr) {
+ *a = instruction;
+ return true;
+ } else if (*b == nullptr) {
+ *b = instruction;
+ return true;
+ }
+ return false; // too many non-const operands
+}
+
+// Detect a + b + c for an optional constant c.
+static bool IsAddConst(HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c) {
+ if (instruction->IsAdd()) {
+ // Try to find a + b and accumulated c.
+ if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
+ IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
+ *b != nullptr) {
+ return true;
+ }
+ // Found a + b.
+ *a = instruction->InputAt(0);
+ *b = instruction->InputAt(1);
+ *c = 0;
+ return true;
+ }
+ return false;
+}
+
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
@@ -1215,24 +1260,23 @@
// Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
// (note whether the sign bit in higher precision is shifted in has no effect
// on the narrow precision computed by the idiom).
- int64_t value = 0;
+ int64_t distance = 0;
if ((instruction->IsShr() ||
instruction->IsUShr()) &&
- IsInt64AndGet(instruction->InputAt(1), /*out*/ &value) && value == 1) {
- //
- // TODO: make following code less sensitive to associativity and commutativity differences.
- //
- HInstruction* x = instruction->InputAt(0);
- // Test for an optional rounding part (x + 1) >> 1.
- bool is_rounded = false;
- if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), /*out*/ &value) && value == 1) {
- x = x->InputAt(0);
- is_rounded = true;
- }
- // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
- if (x->IsAdd()) {
- HInstruction* a = x->InputAt(0);
- HInstruction* b = x->InputAt(1);
+ IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
+ // Test for (a + b + c) >> 1 for optional constant c.
+ HInstruction* a = nullptr;
+ HInstruction* b = nullptr;
+ int64_t c = 0;
+ if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
+ // Accept c == 1 (rounded) or c == 0 (not rounded).
+ bool is_rounded = false;
+ if (c == 1) {
+ is_rounded = true;
+ } else if (c != 0) {
+ return false;
+ }
+ // Accept consistent zero or sign extension on operands a and b.
HInstruction* r = nullptr;
HInstruction* s = nullptr;
bool is_unsigned = false;
diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
index db495f6..4e6b4bd 100644
--- a/test/646-checker-hadd-short/src/Main.java
+++ b/test/646-checker-hadd-short/src/Main.java
@@ -49,6 +49,34 @@
}
}
+ /// CHECK-START: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<I10:i\d+>> IntConstant 10 loop:none
+ /// CHECK-DAG: <<M10:i\d+>> IntConstant -10 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I10>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M10>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Add3>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>> outer_loop:none
+ private static void halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
+ int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+ for (int i = 0; i < min_length; i++) {
+ // Cancelling constant computations do not confuse recognition.
+ bo[i] = (short) (((b1[i] + 10) + (b2[i] - 10)) >> 1);
+ }
+ }
+
/// CHECK-START: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
/// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535 loop:none
@@ -99,6 +127,59 @@
}
}
+ /// CHECK-START: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Add2>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>> outer_loop:none
+ private static void rounding_halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
+ int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+ for (int i = 0; i < min_length; i++) {
+ // Slightly different order in idiom does not confuse recognition.
+ bo[i] = (short) (((1 + b1[i]) + b2[i]) >> 1);
+ }
+ }
+
+ /// CHECK-START: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<I10:i\d+>> IntConstant 10 loop:none
+ /// CHECK-DAG: <<M9:i\d+>> IntConstant -9 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I10>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M9>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Add3>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>> outer_loop:none
+ private static void rounding_halving_add_signed_alt2(short[] b1, short[] b2, short[] bo) {
+ int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+ for (int i = 0; i < min_length; i++) {
+ // Computations that cancel to adding 1 also do not confuse recognition.
+ bo[i] = (short) (((b1[i] + 10) + (b2[i] - 9)) >> 1);
+ }
+ }
+
/// CHECK-START: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
/// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535 loop:none
@@ -126,6 +207,34 @@
}
}
+ /// CHECK-START: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<UMAX>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<UMAX>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add1:i\d+>> Add [<<And2>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add2:i\d+>> Add [<<And1>>,<<Add1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Add2>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>> outer_loop:none
+ private static void rounding_halving_add_unsigned_alt(short[] b1, short[] b2, short[] bo) {
+ int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+ for (int i = 0; i < min_length; i++) {
+ // Slightly different order in idiom does not confuse recognition.
+ bo[i] = (short) ((b1[i] & 0xffff) + ((b2[i] & 0xffff) + 1) >> 1);
+ }
+ }
+
/// CHECK-START: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (before)
/// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767 loop:none
@@ -200,6 +309,11 @@
short e = (short) ((sB1[i] + sB2[i]) >> 1);
expectEquals(e, sBo[i]);
}
+ halving_add_signed_alt(sB1, sB2, sBo);
+ for (int i = 0; i < M; i++) {
+ short e = (short) ((sB1[i] + sB2[i]) >> 1);
+ expectEquals(e, sBo[i]);
+ }
halving_add_unsigned(sB1, sB2, sBo);
for (int i = 0; i < M; i++) {
short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff)) >> 1);
@@ -210,11 +324,26 @@
short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
expectEquals(e, sBo[i]);
}
+ rounding_halving_add_signed_alt(sB1, sB2, sBo);
+ for (int i = 0; i < M; i++) {
+ short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+ expectEquals(e, sBo[i]);
+ }
+ rounding_halving_add_signed_alt2(sB1, sB2, sBo);
+ for (int i = 0; i < M; i++) {
+ short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+ expectEquals(e, sBo[i]);
+ }
rounding_halving_add_unsigned(sB1, sB2, sBo);
for (int i = 0; i < M; i++) {
short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
expectEquals(e, sBo[i]);
}
+ rounding_halving_add_unsigned_alt(sB1, sB2, sBo);
+ for (int i = 0; i < M; i++) {
+ short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
+ expectEquals(e, sBo[i]);
+ }
halving_add_signed_constant(sB1, sBo);
for (int i = 0; i < M; i++) {
short e = (short) ((sB1[i] + 0x7fff) >> 1);