diff options
| author | 2018-03-19 23:09:46 +0000 | |
|---|---|---|
| committer | 2018-03-19 23:09:46 +0000 | |
| commit | 753ce1bcf458ad6c6fbb41689901943d44e7738e (patch) | |
| tree | c3cb5e24f950d895dff23a850f7de3662ff016ca | |
| parent | f191ae7c99e02fbb9e2bba60e7ce3e90a5e08336 (diff) | |
| parent | 5a3927662861e626615d9ae78e65c0645d71474b (diff) | |
Merge "Recognize signed saturation in single clipping."
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 108 | ||||
| -rw-r--r-- | test/678-checker-simd-saturation/src/Main.java | 257 |
2 files changed, 337 insertions, 28 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index e1fb7ac17e..758aca2d0c 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -358,40 +358,92 @@ static HInstruction* FindClippee(HInstruction* instruction, return instruction; } -// Accept various saturated addition forms. -static bool IsSaturatedAdd(DataType::Type type, int64_t lo, int64_t hi, bool is_unsigned) { - // MIN(r + s, 255) => SAT_ADD_unsigned - // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed etc. +// Set value range for type (or fail). +static bool CanSetRange(DataType::Type type, + /*out*/ int64_t* uhi, + /*out*/ int64_t* slo, + /*out*/ int64_t* shi) { if (DataType::Size(type) == 1) { - return is_unsigned - ? (lo <= 0 && hi == std::numeric_limits<uint8_t>::max()) - : (lo == std::numeric_limits<int8_t>::min() && - hi == std::numeric_limits<int8_t>::max()); + *uhi = std::numeric_limits<uint8_t>::max(); + *slo = std::numeric_limits<int8_t>::min(); + *shi = std::numeric_limits<int8_t>::max(); + return true; } else if (DataType::Size(type) == 2) { - return is_unsigned - ? (lo <= 0 && hi == std::numeric_limits<uint16_t>::max()) - : (lo == std::numeric_limits<int16_t>::min() && - hi == std::numeric_limits<int16_t>::max()); + *uhi = std::numeric_limits<uint16_t>::max(); + *slo = std::numeric_limits<int16_t>::min(); + *shi = std::numeric_limits<int16_t>::max(); + return true; } return false; } +// Accept various saturated addition forms. +static bool IsSaturatedAdd(HInstruction* clippee, + DataType::Type type, + int64_t lo, + int64_t hi, + bool is_unsigned) { + int64_t ulo = 0, uhi = 0, slo = 0, shi = 0; + if (!CanSetRange(type, &uhi, &slo, &shi)) { + return false; + } + // Tighten the range for signed single clipping on constant. + if (!is_unsigned) { + int64_t c = 0; + HInstruction* notused = nullptr; + if (IsAddConst(clippee, ¬used, &c)) { + // For c in proper range and narrower operand r: + // MIN(r + c, 127) c > 0 + // or MAX(r + c, -128) c < 0 (and possibly redundant bound). + if (0 < c && c <= shi && hi == shi) { + if (lo <= (slo + c)) { + return true; + } + } else if (slo <= c && c < 0 && lo == slo) { + if (hi >= (shi + c)) { + return true; + } + } + } + } + // Detect for narrower operands r and s: + // MIN(r + s, 255) => SAT_ADD_unsigned + // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed. + return is_unsigned ? (lo <= ulo && hi == uhi) : (lo == slo && hi == shi); +} + // Accept various saturated subtraction forms. -static bool IsSaturatedSub(DataType::Type type, int64_t lo, int64_t hi, bool is_unsigned) { - // MAX(r - s, 0) => SAT_SUB_unsigned - // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed etc. - if (DataType::Size(type) == 1) { - return is_unsigned - ? (lo == 0 && hi >= std::numeric_limits<uint8_t>::max()) - : (lo == std::numeric_limits<int8_t>::min() && - hi == std::numeric_limits<int8_t>::max()); - } else if (DataType::Size(type) == 2) { - return is_unsigned - ? (lo == 0 && hi >= std::numeric_limits<uint16_t>::min()) - : (lo == std::numeric_limits<int16_t>::min() && - hi == std::numeric_limits<int16_t>::max()); +static bool IsSaturatedSub(HInstruction* clippee, + DataType::Type type, + int64_t lo, + int64_t hi, + bool is_unsigned) { + int64_t ulo = 0, uhi = 0, slo = 0, shi = 0; + if (!CanSetRange(type, &uhi, &slo, &shi)) { + return false; } - return false; + // Tighten the range for signed single clipping on constant. + if (!is_unsigned) { + int64_t c = 0; + if (IsInt64AndGet(clippee->InputAt(0), /*out*/ &c)) { + // For c in proper range and narrower operand r: + // MIN(c - r, 127) c > 0 + // or MAX(c - r, -128) c < 0 (and possibly redundant bound). + if (0 < c && c <= shi && hi == shi) { + if (lo <= (c - shi)) { + return true; + } + } else if (slo <= c && c < 0 && lo == slo) { + if (hi >= (c - slo)) { + return true; + } + } + } + } + // Detect for narrower operands r and s: + // MAX(r - s, 0) => SAT_SUB_unsigned + // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed. + return is_unsigned ? (lo == ulo && hi >= uhi) : (lo == slo && hi == shi); } // Detect reductions of the following forms, @@ -1909,8 +1961,8 @@ bool HLoopOptimization::VectorizeSaturationIdiom(LoopNode* node, HInstruction* s = nullptr; bool is_unsigned = false; if (IsNarrowerOperands(clippee->InputAt(0), clippee->InputAt(1), type, &r, &s, &is_unsigned) && - (is_add ? IsSaturatedAdd(type, lo, hi, is_unsigned) - : IsSaturatedSub(type, lo, hi, is_unsigned))) { + (is_add ? IsSaturatedAdd(clippee, type, lo, hi, is_unsigned) + : IsSaturatedSub(clippee, type, lo, hi, is_unsigned))) { DCHECK(r != nullptr); DCHECK(s != nullptr); } else { diff --git a/test/678-checker-simd-saturation/src/Main.java b/test/678-checker-simd-saturation/src/Main.java index 33a6f5ec80..d123cc2e25 100644 --- a/test/678-checker-simd-saturation/src/Main.java +++ b/test/678-checker-simd-saturation/src/Main.java @@ -200,6 +200,110 @@ public class Main { } // + // Single clipping signed 8-bit saturation. + // + + /// CHECK-START-{ARM,ARM64}: void Main.satAddPConstSByte(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satAddPConstSByte(byte[] a, byte[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + b[i] = (byte) Math.min(a[i] + 15, 127); + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.satAddNConstSByte(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satAddNConstSByte(byte[] a, byte[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + b[i] = (byte) Math.max(a[i] - 15, -128); + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.satSubPConstSByte(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satSubPConstSByte(byte[] a, byte[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + b[i] = (byte) Math.min(15 - a[i], 127); + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.satSubNConstSByte(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satSubNConstSByte(byte[] a, byte[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + b[i] = (byte) Math.max(-15 - a[i], -128); + } + } + + // + // Single clipping signed 16-bit saturation. + // + + /// CHECK-START-{ARM,ARM64}: void Main.satAddPConstSShort(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satAddPConstSShort(short[] a, short[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + b[i] = (short) Math.min(a[i] + 15, 32767); + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.satAddNConstSShort(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satAddNConstSShort(short[] a, short[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + b[i] = (short) Math.max(a[i] - 15, -32768); + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.satSubPConstSShort(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satSubPConstSShort(short[] a, short[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + b[i] = (short) Math.min(15 - a[i], 32767); + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.satSubNConstSShort(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satSubNConstSShort(short[] a, short[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + b[i] = (short) Math.max(-15 - a[i], -32768); + } + } + + // // Alternatives. // @@ -257,6 +361,87 @@ public class Main { } } + /// CHECK-START-{ARM,ARM64}: void Main.usatAlt1(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void usatAlt1(short[] a, short[] b, short[] c) { + int n = Math.min(a.length, Math.min(b.length, c.length)); + for (int i = 0; i < n; i++) { + int t = (0xffff & a[i]) + (0xffff & b[i]); + c[i] = (short) (t <= 65535 ? t : 65535); + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.usatAlt2(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void usatAlt2(short[] a, short[] b, short[] c) { + int n = Math.min(a.length, Math.min(b.length, c.length)); + for (int i = 0; i < n; i++) { + int t = (a[i] & 0xffff) + (b[i] & 0xffff); + c[i] = (short) (t < 65535 ? t : 65535); + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.usatAlt3(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void usatAlt3(short[] a, short[] b, short[] c) { + int n = Math.min(a.length, Math.min(b.length, c.length)); + for (int i = 0; i < n; i++) { + int x = (a[i] & 0xffff); + int y = (b[i] & 0xffff); + int t = y + x ; + if (t >= 65535) t = 65535; + c[i] = (short) t; + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.usatAlt4(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void usatAlt4(short[] a, short[] b, short[] c) { + int n = Math.min(a.length, Math.min(b.length, c.length)); + for (int i = 0; i < n; i++) { + int x = (a[i] & 0xffff); + int y = (b[i] & 0xffff); + int t = y + x ; + if (t > 65535) t = 65535; + c[i] = (short) t; + } + } + + /// CHECK-START-{ARM,ARM64}: void Main.satRedundantClip(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + public static void satRedundantClip(short[] a, short[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + // Max clipping redundant. + b[i] = (short) Math.max(Math.min(a[i] + 15, 32767), -32768 + 15); + } + } + + /// CHECK-START: void Main.satNonRedundantClip(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecSaturationAdd + public static void satNonRedundantClip(short[] a, short[] b) { + int n = Math.min(a.length, b.length); + for (int i = 0; i < n; i++) { + // Max clipping not redundant (one off). + b[i] = (short) Math.max(Math.min(a[i] + 15, 32767), -32768 + 16); + } + } + // // Test drivers. // @@ -297,6 +482,27 @@ public class Main { byte e = (byte) Math.max(Math.min(b1[i] - b2[i], 127), -128); expectEquals(e, out[i]); } + // Single clipping. + satAddPConstSByte(b1, out); + for (int i = 0; i < m; i++) { + byte e = (byte) Math.min(b1[i] + 15, 127); + expectEquals(e, out[i]); + } + satAddNConstSByte(b1, out); + for (int i = 0; i < m; i++) { + byte e = (byte) Math.max(b1[i] - 15, -128); + expectEquals(e, out[i]); + } + satSubPConstSByte(b1, out); + for (int i = 0; i < m; i++) { + byte e = (byte) Math.min(15 - b1[i], 127); + expectEquals(e, out[i]); + } + satSubNConstSByte(b1, out); + for (int i = 0; i < m; i++) { + byte e = (byte) Math.max(-15 - b1[i], -128); + expectEquals(e, out[i]); + } } private static void test16Bit() { @@ -357,6 +563,27 @@ public class Main { short e = (short) Math.max(Math.min(s1[i] - s2[i], 32767), -32768); expectEquals(e, out[i]); } + // Single clipping. + satAddPConstSShort(s1, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.min(s1[i] + 15, 32767); + expectEquals(e, out[i]); + } + satAddNConstSShort(s1, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.max(s1[i] - 15, -32768); + expectEquals(e, out[i]); + } + satSubPConstSShort(s1, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.min(15 - s1[i], 32767); + expectEquals(e, out[i]); + } + satSubNConstSShort(s1, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.max(-15 - s1[i], -32768); + expectEquals(e, out[i]); + } // Alternatives. satAlt1(s1, s2, out); for (int i = 0; i < m; i++) { @@ -373,6 +600,36 @@ public class Main { short e = (short) Math.max(Math.min(s1[i] + s2[i], 32767), -32768); expectEquals(e, out[i]); } + usatAlt1(s1, s2, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.min((s1[i] & 0xffff) + (s2[i] & 0xffff), 65535); + expectEquals(e, out[i]); + } + usatAlt2(s1, s2, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.min((s1[i] & 0xffff) + (s2[i] & 0xffff), 65535); + expectEquals(e, out[i]); + } + usatAlt3(s1, s2, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.min((s1[i] & 0xffff) + (s2[i] & 0xffff), 65535); + expectEquals(e, out[i]); + } + usatAlt4(s1, s2, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.min((s1[i] & 0xffff) + (s2[i] & 0xffff), 65535); + expectEquals(e, out[i]); + } + satRedundantClip(s1, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.min(s1[i] + 15, 32767); + expectEquals(e, out[i]); + } + satNonRedundantClip(s1, out); + for (int i = 0; i < m; i++) { + short e = (short) Math.max(Math.min(s1[i] + 15, 32767), -32752); + expectEquals(e, out[i]); + } } public static void main(String[] args) { |