diff options
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 76 | ||||
| -rw-r--r-- | test/646-checker-hadd-short/src/Main.java | 129 |
2 files changed, 189 insertions, 16 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 4067aa3468..963df5a938 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -173,6 +173,51 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, return false; } +// Detect up to two instructions a and b, and an acccumulated constant c. +static bool IsAddConstHelper(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c, + int32_t depth) { + static constexpr int32_t kMaxDepth = 8; // don't search too deep + int64_t value = 0; + if (IsInt64AndGet(instruction, &value)) { + *c += value; + return true; + } else if (instruction->IsAdd() && depth <= kMaxDepth) { + return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) && + IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1); + } else if (*a == nullptr) { + *a = instruction; + return true; + } else if (*b == nullptr) { + *b = instruction; + return true; + } + return false; // too many non-const operands +} + +// Detect a + b + c for an optional constant c. +static bool IsAddConst(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c) { + if (instruction->IsAdd()) { + // Try to find a + b and accumulated c. + if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) && + IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) && + *b != nullptr) { + return true; + } + // Found a + b. + *a = instruction->InputAt(0); + *b = instruction->InputAt(1); + *c = 0; + return true; + } + return false; +} + // Test vector restrictions. static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; @@ -1215,24 +1260,23 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 // (note whether the sign bit in higher precision is shifted in has no effect // on the narrow precision computed by the idiom). - int64_t value = 0; + int64_t distance = 0; if ((instruction->IsShr() || instruction->IsUShr()) && - IsInt64AndGet(instruction->InputAt(1), /*out*/ &value) && value == 1) { - // - // TODO: make following code less sensitive to associativity and commutativity differences. - // - HInstruction* x = instruction->InputAt(0); - // Test for an optional rounding part (x + 1) >> 1. - bool is_rounded = false; - if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), /*out*/ &value) && value == 1) { - x = x->InputAt(0); - is_rounded = true; - } - // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed. - if (x->IsAdd()) { - HInstruction* a = x->InputAt(0); - HInstruction* b = x->InputAt(1); + IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) { + // Test for (a + b + c) >> 1 for optional constant c. + HInstruction* a = nullptr; + HInstruction* b = nullptr; + int64_t c = 0; + if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { + // Accept c == 1 (rounded) or c == 0 (not rounded). + bool is_rounded = false; + if (c == 1) { + is_rounded = true; + } else if (c != 0) { + return false; + } + // Accept consistent zero or sign extension on operands a and b. HInstruction* r = nullptr; HInstruction* s = nullptr; bool is_unsigned = false; diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java index db495f6433..4e6b4bde8b 100644 --- a/test/646-checker-hadd-short/src/Main.java +++ b/test/646-checker-hadd-short/src/Main.java @@ -49,6 +49,34 @@ public class Main { } } + /// CHECK-START: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<I10:i\d+>> IntConstant 10 loop:none + /// CHECK-DAG: <<M10:i\d+>> IntConstant -10 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I10>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M10>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Add3>>,<<I1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>> outer_loop:none + private static void halving_add_signed_alt(short[] b1, short[] b2, short[] bo) { + int min_length = Math.min(bo.length, Math.min(b1.length, b2.length)); + for (int i = 0; i < min_length; i++) { + // Cancelling constant computations do not confuse recognition. + bo[i] = (short) (((b1[i] + 10) + (b2[i] - 10)) >> 1); + } + } + /// CHECK-START: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (before) /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535 loop:none @@ -99,6 +127,59 @@ public class Main { } } + /// CHECK-START: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Add2>>,<<I1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>> outer_loop:none + private static void rounding_halving_add_signed_alt(short[] b1, short[] b2, short[] bo) { + int min_length = Math.min(bo.length, Math.min(b1.length, b2.length)); + for (int i = 0; i < min_length; i++) { + // Slightly different order in idiom does not confuse recognition. + bo[i] = (short) (((1 + b1[i]) + b2[i]) >> 1); + } + } + + /// CHECK-START: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<I10:i\d+>> IntConstant 10 loop:none + /// CHECK-DAG: <<M9:i\d+>> IntConstant -9 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I10>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M9>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Add3>>,<<I1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>> outer_loop:none + private static void rounding_halving_add_signed_alt2(short[] b1, short[] b2, short[] bo) { + int min_length = Math.min(bo.length, Math.min(b1.length, b2.length)); + for (int i = 0; i < min_length; i++) { + // Computations that cancel to adding 1 also do not confuse recognition. + bo[i] = (short) (((b1[i] + 10) + (b2[i] - 9)) >> 1); + } + } + /// CHECK-START: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (before) /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535 loop:none @@ -126,6 +207,34 @@ public class Main { } } + /// CHECK-START: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<UMAX>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<UMAX>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add1:i\d+>> Add [<<And2>>,<<I1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add2:i\d+>> Add [<<And1>>,<<Add1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Add2>>,<<I1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>> outer_loop:none + private static void rounding_halving_add_unsigned_alt(short[] b1, short[] b2, short[] bo) { + int min_length = Math.min(bo.length, Math.min(b1.length, b2.length)); + for (int i = 0; i < min_length; i++) { + // Slightly different order in idiom does not confuse recognition. + bo[i] = (short) ((b1[i] & 0xffff) + ((b2[i] & 0xffff) + 1) >> 1); + } + } + /// CHECK-START: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (before) /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767 loop:none @@ -200,6 +309,11 @@ public class Main { short e = (short) ((sB1[i] + sB2[i]) >> 1); expectEquals(e, sBo[i]); } + halving_add_signed_alt(sB1, sB2, sBo); + for (int i = 0; i < M; i++) { + short e = (short) ((sB1[i] + sB2[i]) >> 1); + expectEquals(e, sBo[i]); + } halving_add_unsigned(sB1, sB2, sBo); for (int i = 0; i < M; i++) { short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff)) >> 1); @@ -210,11 +324,26 @@ public class Main { short e = (short) ((sB1[i] + sB2[i] + 1) >> 1); expectEquals(e, sBo[i]); } + rounding_halving_add_signed_alt(sB1, sB2, sBo); + for (int i = 0; i < M; i++) { + short e = (short) ((sB1[i] + sB2[i] + 1) >> 1); + expectEquals(e, sBo[i]); + } + rounding_halving_add_signed_alt2(sB1, sB2, sBo); + for (int i = 0; i < M; i++) { + short e = (short) ((sB1[i] + sB2[i] + 1) >> 1); + expectEquals(e, sBo[i]); + } rounding_halving_add_unsigned(sB1, sB2, sBo); for (int i = 0; i < M; i++) { short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1); expectEquals(e, sBo[i]); } + rounding_halving_add_unsigned_alt(sB1, sB2, sBo); + for (int i = 0; i < M; i++) { + short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1); + expectEquals(e, sBo[i]); + } halving_add_signed_constant(sB1, sBo); for (int i = 0; i < M; i++) { short e = (short) ((sB1[i] + 0x7fff) >> 1); |