diff options
author | 2017-05-18 14:45:27 -0700 | |
---|---|---|
committer | 2017-05-18 17:18:24 -0700 | |
commit | 636e870d55c1739e2318c2180fac349683dbfa97 (patch) | |
tree | 6c726b0b918e26aba5b5f9ec1bc900045ef2c3e3 | |
parent | de31d084f7d64c94911aef927798559d39759f95 (diff) |
Support for narrow operands in "dangerous" operations.
Rationale:
Under strict conditions, even operations that are sensitive
to higher order bits can vectorize by inspecting the operands
carefully. This enables more vectorization, as demonstrated
by the removal of quite a few TODOs.
Test: test-art-target, test-art-host
Change-Id: I2b0fda6a182da9aed9ce1708a53eaf0b7e1c9146
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 146 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 6 | ||||
-rw-r--r-- | test/623-checker-loop-regressions/src/Main.java | 40 | ||||
-rw-r--r-- | test/640-checker-byte-simd/src/Main.java | 12 | ||||
-rw-r--r-- | test/640-checker-char-simd/src/Main.java | 12 | ||||
-rw-r--r-- | test/640-checker-double-simd/src/Main.java | 4 | ||||
-rw-r--r-- | test/640-checker-int-simd/src/Main.java | 12 | ||||
-rw-r--r-- | test/640-checker-long-simd/src/Main.java | 12 | ||||
-rw-r--r-- | test/640-checker-short-simd/src/Main.java | 12 | ||||
-rw-r--r-- | test/645-checker-abs-simd/src/Main.java | 106 | ||||
-rw-r--r-- | test/651-checker-byte-simd-minmax/src/Main.java | 76 | ||||
-rw-r--r-- | test/651-checker-char-simd-minmax/src/Main.java | 18 | ||||
-rw-r--r-- | test/651-checker-double-simd-minmax/src/Main.java | 2 | ||||
-rw-r--r-- | test/651-checker-int-simd-minmax/src/Main.java | 4 | ||||
-rw-r--r-- | test/651-checker-short-simd-minmax/src/Main.java | 76 |
15 files changed, 446 insertions, 92 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 963df5a938..94787c99b2 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -173,6 +173,39 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, return false; } +// Detect situations with same-extension narrower operands. +// Returns true on success and sets is_unsigned accordingly. +static bool IsNarrowerOperands(HInstruction* a, + HInstruction* b, + Primitive::Type type, + /*out*/ HInstruction** r, + /*out*/ HInstruction** s, + /*out*/ bool* is_unsigned) { + if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) { + *is_unsigned = false; + return true; + } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) { + *is_unsigned = true; + return true; + } + return false; +} + +// As above, single operand. +static bool IsNarrowerOperand(HInstruction* a, + Primitive::Type type, + /*out*/ HInstruction** r, + /*out*/ bool* is_unsigned) { + if (IsSignExtensionAndGet(a, type, r)) { + *is_unsigned = false; + return true; + } else if (IsZeroExtensionAndGet(a, type, r)) { + *is_unsigned = true; + return true; + } + return false; +} + // Detect up to two instructions a and b, and an acccumulated constant c. static bool IsAddConstHelper(HInstruction* instruction, /*out*/ HInstruction** a, @@ -756,7 +789,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); } -// TODO: more operations and intrinsics, detect saturation arithmetic, etc. +// TODO: saturation arithmetic. bool HLoopOptimization::VectorizeUse(LoopNode* node, HInstruction* instruction, bool generate_code, @@ -867,25 +900,38 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + HInstruction* r = opa; + bool is_unsigned = false; if ((HasVectorRestrictions(restrictions, kNoShift)) || (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) { return false; // unsupported instruction - } else if ((instruction->IsShr() || instruction->IsUShr()) && - HasVectorRestrictions(restrictions, kNoHiBits)) { - return false; // hibits may impact lobits; TODO: we can do better! + } else if (HasVectorRestrictions(restrictions, kNoHiBits)) { + // Shifts right need extra care to account for higher order bits. + // TODO: less likely shr/unsigned and ushr/signed can by flipping signess. + if (instruction->IsShr() && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless all operands are sign-extension narrower + } else if (instruction->IsUShr() && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) { + return false; // reject, unless all operands are zero-extension narrower + } } // Accept shift operator for vectorizable/invariant operands. // TODO: accept symbolic, albeit loop invariant shift factors. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } int64_t distance = 0; - if (VectorizeUse(node, opa, generate_code, type, restrictions) && + if (VectorizeUse(node, r, generate_code, type, restrictions) && IsInt64AndGet(opb, /*out*/ &distance)) { // Restrict shift distance to packed data type width. int64_t max_distance = Primitive::ComponentSize(type) * 8; if (0 <= distance && distance < max_distance) { if (generate_code) { - GenerateVecOp(instruction, vector_map_->Get(opa), opb, type); + GenerateVecOp(instruction, vector_map_->Get(r), opb, type); } return true; } @@ -899,16 +945,23 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, case Intrinsics::kMathAbsFloat: case Intrinsics::kMathAbsDouble: { // Deal with vector restrictions. - if (HasVectorRestrictions(restrictions, kNoAbs) || - HasVectorRestrictions(restrictions, kNoHiBits)) { - // TODO: we can do better for some hibits cases. + HInstruction* opa = instruction->InputAt(0); + HInstruction* r = opa; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoAbs)) { return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless operand is sign-extension narrower } // Accept ABS(x) for vectorizable operand. - HInstruction* opa = instruction->InputAt(0); - if (VectorizeUse(node, opa, generate_code, type, restrictions)) { + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } + if (VectorizeUse(node, r, generate_code, type, restrictions)) { if (generate_code) { - GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); + GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type); } return true; } @@ -923,18 +976,28 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMaxDoubleDouble: { // Deal with vector restrictions. - if (HasVectorRestrictions(restrictions, kNoMinMax) || - HasVectorRestrictions(restrictions, kNoHiBits)) { - // TODO: we can do better for some hibits cases. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + HInstruction* r = opa; + HInstruction* s = opb; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoMinMax)) { return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { + return false; // reject, unless all operands are same-extension narrower } // Accept MIN/MAX(x, y) for vectorizable operands. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); - if (VectorizeUse(node, opa, generate_code, type, restrictions) && - VectorizeUse(node, opb, generate_code, type, restrictions)) { + DCHECK(r != nullptr && s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + s = opb; + } + if (VectorizeUse(node, r, generate_code, type, restrictions) && + VectorizeUse(node, s, generate_code, type, restrictions)) { if (generate_code) { - GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type); + GenerateVecOp( + instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); } return true; } @@ -959,11 +1022,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoAbs; + *restrictions |= kNoDiv; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoAbs; + *restrictions |= kNoDiv; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; @@ -1098,13 +1161,14 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, void HLoopOptimization::GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, - Primitive::Type type) { + Primitive::Type type, + bool is_unsigned) { if (vector_mode_ == kSequential) { - // Scalar code follows implicit integral promotion. - if (type == Primitive::kPrimBoolean || - type == Primitive::kPrimByte || - type == Primitive::kPrimChar || - type == Primitive::kPrimShort) { + // Non-converting scalar code follows implicit integral promotion. + if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean || + type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort)) { type = Primitive::kPrimInt; } } @@ -1185,7 +1249,6 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, case Intrinsics::kMathMinLongLong: case Intrinsics::kMathMinFloatFloat: case Intrinsics::kMathMinDoubleDouble: { - bool is_unsigned = false; // TODO: detect unsigned versions vector = new (global_allocator_) HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned); break; @@ -1194,7 +1257,6 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, case Intrinsics::kMathMaxLongLong: case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMaxDoubleDouble: { - bool is_unsigned = false; // TODO: detect unsigned versions vector = new (global_allocator_) HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned); break; @@ -1258,7 +1320,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, Primitive::Type type, uint64_t restrictions) { // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 - // (note whether the sign bit in higher precision is shifted in has no effect + // (note whether the sign bit in wider precision is shifted in has no effect // on the narrow precision computed by the idiom). int64_t distance = 0; if ((instruction->IsShr() || @@ -1269,6 +1331,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* b = nullptr; int64_t c = 0; if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { + DCHECK(a != nullptr && b != nullptr); // Accept c == 1 (rounded) or c == 0 (not rounded). bool is_rounded = false; if (c == 1) { @@ -1280,11 +1343,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* r = nullptr; HInstruction* s = nullptr; bool is_unsigned = false; - if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) { - is_unsigned = true; - } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) { - is_unsigned = false; - } else { + if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) { return false; } // Deal with vector restrictions. @@ -1295,6 +1354,10 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, // Accept recognized halving add for vectorizable operands. Vectorized code uses the // shorthand idiomatic operation. Sequential code uses the original scalar expressions. DCHECK(r != nullptr && s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = instruction->InputAt(0); + s = instruction->InputAt(1); + } if (VectorizeUse(node, r, generate_code, type, restrictions) && VectorizeUse(node, s, generate_code, type, restrictions)) { if (generate_code) { @@ -1308,12 +1371,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, is_unsigned, is_rounded)); } else { - VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions); - VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions); - GenerateVecOp(instruction, - vector_map_->Get(instruction->InputAt(0)), - vector_map_->Get(instruction->InputAt(1)), - type); + GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type); } } return true; diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 6d5978d337..35298d4076 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -137,7 +137,11 @@ class HLoopOptimization : public HOptimization { HInstruction* opa, HInstruction* opb, Primitive::Type type); - void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type); + void GenerateVecOp(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + Primitive::Type type, + bool is_unsigned = false); // Vectorization idioms. bool VectorizeHalvingAddIdiom(LoopNode* node, diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java index 520e7c367c..3a2145bf2b 100644 --- a/test/623-checker-loop-regressions/src/Main.java +++ b/test/623-checker-loop-regressions/src/Main.java @@ -351,6 +351,35 @@ public class Main { } } + /// CHECK-START: void Main.typeConv(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<One:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:b\d+>> ArrayGet [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add:i\d+>> Add [<<Get>>,<<One>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Add>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.typeConv(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<One:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Vadd:d\d+>> VecAdd [<<Load>>,<<Repl>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi1>>,<<Vadd>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:b\d+>> ArrayGet [{{l\d+}},<<Phi2>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: <<Add:i\d+>> Add [<<Get>>,<<One>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Add>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi2>>,<<Cnv>>] loop:<<Loop2>> outer_loop:none + // + // Scalar code in cleanup loop uses correct byte type on array get and type conversion. + private static void typeConv(byte[] a, byte[] b) { + int len = Math.min(a.length, b.length); + for (int i = 0; i < len; i++) { + a[i] = (byte) (b[i] + 1); + } + } + public static void main(String[] args) { expectEquals(10, earlyExitFirst(-1)); for (int i = 0; i <= 10; i++) { @@ -453,6 +482,17 @@ public class Main { expectEquals(40, bt[i]); } + byte[] b1 = new byte[259]; // few extra iterations + byte[] b2 = new byte[259]; + for (int i = 0; i < 259; i++) { + b1[i] = 0; + b2[i] = (byte) i; + } + typeConv(b1, b2); + for (int i = 0; i < 259; i++) { + expectEquals((byte)(i + 1), b1[i]); + } + System.out.println("passed"); } diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java index 10b20b83b0..21d71e8a13 100644 --- a/test/640-checker-byte-simd/src/Main.java +++ b/test/640-checker-byte-simd/src/Main.java @@ -135,8 +135,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecShr loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none static void sar2() { for (int i = 0; i < 128; i++) a[i] >>= 2; @@ -147,9 +149,9 @@ public class Main { /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // - /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) - // - // TODO: fill in when supported + // TODO: would need signess flip. + /// CHECK-START: void Main.shr2() loop_optimization (after) + /// CHECK-NOT: VecUShr static void shr2() { for (int i = 0; i < 128; i++) a[i] >>>= 2; diff --git a/test/640-checker-char-simd/src/Main.java b/test/640-checker-char-simd/src/Main.java index 0628b36003..89d4b6b84e 100644 --- a/test/640-checker-char-simd/src/Main.java +++ b/test/640-checker-char-simd/src/Main.java @@ -134,9 +134,9 @@ public class Main { /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // - /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) - // - // TODO: fill in when supported + // TODO: would need signess flip. + /// CHECK-START: void Main.sar2() loop_optimization (after) + /// CHECK-NOT: VecShr static void sar2() { for (int i = 0; i < 128; i++) a[i] >>= 2; @@ -148,8 +148,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecUShr loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none static void shr2() { for (int i = 0; i < 128; i++) a[i] >>>= 2; diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java index 0d4f87a6cd..5709b5dab8 100644 --- a/test/640-checker-double-simd/src/Main.java +++ b/test/640-checker-double-simd/src/Main.java @@ -122,8 +122,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.conv(long[]) loop_optimization (after) + /// CHECK-NOT: VecLoad + /// CHECK-NOT: VecStore // - // TODO: fill in when supported + // TODO: fill in when long2double is supported static void conv(long[] b) { for (int i = 0; i < 128; i++) a[i] = b[i]; diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java index 97048eb951..9ee553c469 100644 --- a/test/640-checker-int-simd/src/Main.java +++ b/test/640-checker-int-simd/src/Main.java @@ -136,8 +136,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecShr loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none static void sar2() { for (int i = 0; i < 128; i++) a[i] >>= 2; @@ -149,8 +151,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecUShr loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none static void shr2() { for (int i = 0; i < 128; i++) a[i] >>>= 2; diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java index e42c716d19..8f6af9d012 100644 --- a/test/640-checker-long-simd/src/Main.java +++ b/test/640-checker-long-simd/src/Main.java @@ -134,8 +134,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecShr loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none static void sar2() { for (int i = 0; i < 128; i++) a[i] >>= 2; @@ -147,8 +149,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecUShr loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none static void shr2() { for (int i = 0; i < 128; i++) a[i] >>>= 2; diff --git a/test/640-checker-short-simd/src/Main.java b/test/640-checker-short-simd/src/Main.java index 241f8e6eea..f62c726c05 100644 --- a/test/640-checker-short-simd/src/Main.java +++ b/test/640-checker-short-simd/src/Main.java @@ -135,8 +135,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecShr loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none static void sar2() { for (int i = 0; i < 128; i++) a[i] >>= 2; @@ -147,9 +149,9 @@ public class Main { /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // - /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) - // - // TODO: fill in when supported + // TODO: would need signess flip. + /// CHECK-START: void Main.shr2() loop_optimization (after) + /// CHECK-NOT: VecUShr static void shr2() { for (int i = 0; i < 128; i++) a[i] >>>= 2; diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java index 76850abded..5a63d9f539 100644 --- a/test/645-checker-abs-simd/src/Main.java +++ b/test/645-checker-abs-simd/src/Main.java @@ -22,6 +22,67 @@ public class Main { private static final int SPQUIET = 1 << 22; private static final long DPQUIET = 1L << 51; + /// CHECK-START: void Main.doitByte(byte[]) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitByte(byte[]) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecAbs loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop2>> outer_loop:none + // + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" + private static void doitByte(byte[] x) { + for (int i = 0; i < x.length; i++) { + x[i] = (byte) Math.abs(x[i]); + } + } + + /// CHECK-START: void Main.doitChar(char[]) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.doitChar(char[]) loop_optimization (after) + /// CHECK-NOT: VecAbs + private static void doitChar(char[] x) { + // Basically a nop due to zero extension. + for (int i = 0; i < x.length; i++) { + x[i] = (char) Math.abs(x[i]); + } + } + + /// CHECK-START: void Main.doitShort(short[]) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitShort(short[]) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecAbs loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop2>> outer_loop:none + // + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" + private static void doitShort(short[] x) { + for (int i = 0; i < x.length; i++) { + x[i] = (short) Math.abs(x[i]); + } + } + /// CHECK-START: void Main.doitInt(int[]) loop_optimization (before) /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none @@ -52,8 +113,16 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.doitLong(long[]) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecAbs loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsLong loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop2>> outer_loop:none // - // TODO: Not supported yet. + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" private static void doitLong(long[] x) { for (int i = 0; i < x.length; i++) { x[i] = Math.abs(x[i]); @@ -90,8 +159,16 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START-ARM64: void Main.doitDouble(double[]) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecAbs loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsDouble loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop2>> outer_loop:none // - // TODO: Not supported yet. + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" private static void doitDouble(double[] x) { for (int i = 0; i < x.length; i++) { x[i] = Math.abs(x[i]); @@ -99,6 +176,31 @@ public class Main { } public static void main(String[] args) { + // Bytes, chars, shorts. + byte[] xb = new byte[256]; + for (int i = 0; i < 256; i++) { + xb[i] = (byte) i; + } + doitByte(xb); + for (int i = 0; i < 256; i++) { + expectEquals32((byte) Math.abs((byte) i), xb[i]); + } + char[] xc = new char[1024 * 64]; + for (int i = 0; i < 1024 * 64; i++) { + xc[i] = (char) i; + } + doitChar(xc); + for (int i = 0; i < 1024 *64; i++) { + expectEquals32((char) Math.abs((char) i), xc[i]); + } + short[] xs = new short[1024 * 64]; + for (int i = 0; i < 1024 * 64; i++) { + xs[i] = (short) i; + } + doitShort(xs); + for (int i = 0; i < 1024 * 64; i++) { + expectEquals32((short) Math.abs((short) i), xs[i]); + } // Set up minint32, maxint32 and some others. int[] xi = new int[8]; xi[0] = 0x80000000; diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java index 8211ace741..fe4580784a 100644 --- a/test/651-checker-byte-simd-minmax/src/Main.java +++ b/test/651-checker-byte-simd-minmax/src/Main.java @@ -27,9 +27,12 @@ public class Main { /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // - // TODO: narrow type vectorization. - /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after) - /// CHECK-NOT: VecMin + /// CHECK-START-ARM64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none private static void doitMin(byte[] x, byte[] y, byte[] z) { int min = Math.min(x.length, Math.min(y.length, z.length)); for (int i = 0; i < min; i++) { @@ -37,6 +40,30 @@ public class Main { } } + /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<I255:i\d+>> IntConstant 255 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + private static void doitMinUnsigned(byte[] x, byte[] y, byte[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (byte) Math.min(y[i] & 0xff, z[i] & 0xff); + } + } + /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (before) /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none @@ -45,9 +72,12 @@ public class Main { /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // - // TODO: narrow type vectorization. - /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after) - /// CHECK-NOT: VecMax + /// CHECK-START-ARM64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none private static void doitMax(byte[] x, byte[] y, byte[] z) { int min = Math.min(x.length, Math.min(y.length, z.length)); for (int i = 0; i < min; i++) { @@ -55,6 +85,30 @@ public class Main { } } + /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<I255:i\d+>> IntConstant 255 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + private static void doitMaxUnsigned(byte[] x, byte[] y, byte[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (byte) Math.max(y[i] & 0xff, z[i] & 0xff); + } + } + public static void main(String[] args) { // Initialize cross-values for all possible values. int total = 256 * 256; @@ -77,11 +131,21 @@ public class Main { byte expected = (byte) Math.min(y[i], z[i]); expectEquals(expected, x[i]); } + doitMinUnsigned(x, y, z); + for (int i = 0; i < total; i++) { + byte expected = (byte) Math.min(y[i] & 0xff, z[i] & 0xff); + expectEquals(expected, x[i]); + } doitMax(x, y, z); for (int i = 0; i < total; i++) { byte expected = (byte) Math.max(y[i], z[i]); expectEquals(expected, x[i]); } + doitMaxUnsigned(x, y, z); + for (int i = 0; i < total; i++) { + byte expected = (byte) Math.max(y[i] & 0xff, z[i] & 0xff); + expectEquals(expected, x[i]); + } System.out.println("passed"); } diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java index 5ce7b94bf4..e2998dadf6 100644 --- a/test/651-checker-char-simd-minmax/src/Main.java +++ b/test/651-checker-char-simd-minmax/src/Main.java @@ -27,9 +27,12 @@ public class Main { /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // - // TODO: narrow type vectorization. - /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (after) - /// CHECK-NOT: VecMin + /// CHECK-START-ARM64: void Main.doitMin(char[], char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none private static void doitMin(char[] x, char[] y, char[] z) { int min = Math.min(x.length, Math.min(y.length, z.length)); for (int i = 0; i < min; i++) { @@ -45,9 +48,12 @@ public class Main { /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // - // TODO: narrow type vectorization. - /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (after) - /// CHECK-NOT: VecMax + /// CHECK-START-ARM64: void Main.doitMax(char[], char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none private static void doitMax(char[] x, char[] y, char[] z) { int min = Math.min(x.length, Math.min(y.length, z.length)); for (int i = 0; i < min; i++) { diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java index e1711aef60..cf04f85906 100644 --- a/test/651-checker-double-simd-minmax/src/Main.java +++ b/test/651-checker-double-simd-minmax/src/Main.java @@ -48,7 +48,7 @@ public class Main { /// CHECK-DAG: <<Max:d\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none // - // TODO-x86: 0.0 vs -0.0? + // TODO x86: 0.0 vs -0.0? // /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after) /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java index 4e05a9ded3..6cee7b5484 100644 --- a/test/651-checker-int-simd-minmax/src/Main.java +++ b/test/651-checker-int-simd-minmax/src/Main.java @@ -30,7 +30,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none private static void doitMin(int[] x, int[] y, int[] z) { int min = Math.min(x.length, Math.min(y.length, z.length)); @@ -50,7 +50,7 @@ public class Main { /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none - /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none private static void doitMax(int[] x, int[] y, int[] z) { int min = Math.min(x.length, Math.min(y.length, z.length)); diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java index f34f5264c1..7cbadaf52a 100644 --- a/test/651-checker-short-simd-minmax/src/Main.java +++ b/test/651-checker-short-simd-minmax/src/Main.java @@ -27,9 +27,12 @@ public class Main { /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // - // TODO: narrow type vectorization. - /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (after) - /// CHECK-NOT: VecMin + /// CHECK-START-ARM64: void Main.doitMin(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none private static void doitMin(short[] x, short[] y, short[] z) { int min = Math.min(x.length, Math.min(y.length, z.length)); for (int i = 0; i < min; i++) { @@ -37,6 +40,30 @@ public class Main { } } + /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + private static void doitMinUnsigned(short[] x, short[] y, short[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (short) Math.min(y[i] & 0xffff, z[i] & 0xffff); + } + } + /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (before) /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none @@ -45,9 +72,12 @@ public class Main { /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none // - // TODO: narrow type vectorization. - /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (after) - /// CHECK-NOT: VecMax + /// CHECK-START-ARM64: void Main.doitMax(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none private static void doitMax(short[] x, short[] y, short[] z) { int min = Math.min(x.length, Math.min(y.length, z.length)); for (int i = 0; i < min; i++) { @@ -55,6 +85,30 @@ public class Main { } } + /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + private static void doitMaxUnsigned(short[] x, short[] y, short[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff); + } + } + public static void main(String[] args) { short[] interesting = { (short) 0x0000, (short) 0x0001, (short) 0x007f, @@ -91,11 +145,21 @@ public class Main { short expected = (short) Math.min(y[i], z[i]); expectEquals(expected, x[i]); } + doitMinUnsigned(x, y, z); + for (int i = 0; i < total; i++) { + short expected = (short) Math.min(y[i] & 0xffff, z[i] & 0xffff); + expectEquals(expected, x[i]); + } doitMax(x, y, z); for (int i = 0; i < total; i++) { short expected = (short) Math.max(y[i], z[i]); expectEquals(expected, x[i]); } + doitMaxUnsigned(x, y, z); + for (int i = 0; i < total; i++) { + short expected = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff); + expectEquals(expected, x[i]); + } System.out.println("passed"); } |