Refined add/sub analysis vis-a-vis SIMD idioms.
Rationale:
Slightly more general detection of + and - with
constants ensures less cases are undetected.
Bug: b/74026074
Test: test-art-host,target
Change-Id: Ie5bb2dd10294436a27487e5a1ddc77d9e2dd2303
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 1d83815..71e24de 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -227,6 +227,7 @@
/*out*/ HInstruction** r,
/*out*/ HInstruction** s,
/*out*/ bool* is_unsigned) {
+ DCHECK(a != nullptr && b != nullptr);
// Look for a matching sign extension.
DataType::Type stype = HVecOperation::ToSignedType(type);
if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) {
@@ -247,6 +248,7 @@
DataType::Type type,
/*out*/ HInstruction** r,
/*out*/ bool* is_unsigned) {
+ DCHECK(a != nullptr);
// Look for a matching sign extension.
DataType::Type stype = HVecOperation::ToSignedType(type);
if (IsSignExtensionAndGet(a, stype, r)) {
@@ -270,20 +272,28 @@
return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type));
}
-// Detect up to two instructions a and b, and an acccumulated constant c.
-static bool IsAddConstHelper(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ HInstruction** b,
- /*out*/ int64_t* c,
- int32_t depth) {
- static constexpr int32_t kMaxDepth = 8; // don't search too deep
+// Detect up to two added operands a and b and an acccumulated constant c.
+static bool IsAddConst(HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c,
+ int32_t depth = 8) { // don't search too deep
int64_t value = 0;
+ // Enter add/sub while still within reasonable depth.
+ if (depth > 0) {
+ if (instruction->IsAdd()) {
+ return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) &&
+ IsAddConst(instruction->InputAt(1), a, b, c, depth - 1);
+ } else if (instruction->IsSub() &&
+ IsInt64AndGet(instruction->InputAt(1), &value)) {
+ *c -= value;
+ return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1);
+ }
+ }
+ // Otherwise, deal with leaf nodes.
if (IsInt64AndGet(instruction, &value)) {
*c += value;
return true;
- } else if (instruction->IsAdd() && depth <= kMaxDepth) {
- return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
- IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
} else if (*a == nullptr) {
*a = instruction;
return true;
@@ -291,42 +301,40 @@
*b = instruction;
return true;
}
- return false; // too many non-const operands
+ return false; // too many operands
}
-// Detect a + b + c for an optional constant c.
-static bool IsAddConst(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ HInstruction** b,
- /*out*/ int64_t* c) {
- if (instruction->IsAdd()) {
- // Try to find a + b and accumulated c.
- if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
- IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
- *b != nullptr) {
- return true;
+// Detect a + b + c with optional constant c.
+static bool IsAddConst2(HGraph* graph,
+ HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c) {
+ if (IsAddConst(instruction, a, b, c) && *a != nullptr) {
+ if (*b == nullptr) {
+ // Constant is usually already present, unless accumulated.
+ *b = graph->GetConstant(instruction->GetType(), (*c));
+ *c = 0;
}
- // Found a + b.
- *a = instruction->InputAt(0);
- *b = instruction->InputAt(1);
- *c = 0;
return true;
}
return false;
}
-// Detect a + c for constant c.
-static bool IsAddConst(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ int64_t* c) {
- if (instruction->IsAdd()) {
- if (IsInt64AndGet(instruction->InputAt(0), c)) {
- *a = instruction->InputAt(1);
- return true;
- } else if (IsInt64AndGet(instruction->InputAt(1), c)) {
- *a = instruction->InputAt(0);
- return true;
- }
+// Detect a direct a - b or a hidden a - (-c).
+static bool IsSubConst2(HGraph* graph,
+ HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b) {
+ int64_t c = 0;
+ if (instruction->IsSub()) {
+ *a = instruction->InputAt(0);
+ *b = instruction->InputAt(1);
+ return true;
+ } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) {
+ // Constant for the hidden subtraction.
+ *b = graph->GetConstant(instruction->GetType(), -c);
+ return true;
}
return false;
}
@@ -378,7 +386,8 @@
}
// Accept various saturated addition forms.
-static bool IsSaturatedAdd(HInstruction* clippee,
+static bool IsSaturatedAdd(HInstruction* a,
+ HInstruction* b,
DataType::Type type,
int64_t lo,
int64_t hi,
@@ -390,8 +399,7 @@
// Tighten the range for signed single clipping on constant.
if (!is_unsigned) {
int64_t c = 0;
- HInstruction* notused = nullptr;
- if (IsAddConst(clippee, ¬used, &c)) {
+ if (IsInt64AndGet(a, &c) || IsInt64AndGet(b, &c)) {
// For c in proper range and narrower operand r:
// MIN(r + c, 127) c > 0
// or MAX(r + c, -128) c < 0 (and possibly redundant bound).
@@ -413,7 +421,7 @@
}
// Accept various saturated subtraction forms.
-static bool IsSaturatedSub(HInstruction* clippee,
+static bool IsSaturatedSub(HInstruction* a,
DataType::Type type,
int64_t lo,
int64_t hi,
@@ -425,7 +433,7 @@
// Tighten the range for signed single clipping on constant.
if (!is_unsigned) {
int64_t c = 0;
- if (IsInt64AndGet(clippee->InputAt(0), /*out*/ &c)) {
+ if (IsInt64AndGet(a, /*out*/ &c)) {
// For c in proper range and narrower operand r:
// MIN(c - r, 127) c > 0
// or MAX(c - r, -128) c < 0 (and possibly redundant bound).
@@ -1521,8 +1529,7 @@
return false; // reject, unless all operands are same-extension narrower
}
// Accept MIN/MAX(x, y) for vectorizable operands.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
+ DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = opa;
s = opb;
@@ -2026,31 +2033,37 @@
instruction->GetType() != DataType::Type::kInt64) {
return false;
}
- // Clipped addition or subtraction?
+ // Clipped addition or subtraction on narrower operands? We will try both
+ // formats since, e.g., x+c can be interpreted as x+c and x-(-c), depending
+ // on what clipping values are used, to get most benefits.
int64_t lo = std::numeric_limits<int64_t>::min();
int64_t hi = std::numeric_limits<int64_t>::max();
HInstruction* clippee = FindClippee(instruction, &lo, &hi);
- bool is_add = true;
- if (clippee->IsAdd()) {
- is_add = true;
- } else if (clippee->IsSub()) {
- is_add = false;
- } else {
- return false; // clippee is not add/sub
- }
- // Addition or subtraction on narrower operands?
+ HInstruction* a = nullptr;
+ HInstruction* b = nullptr;
HInstruction* r = nullptr;
HInstruction* s = nullptr;
bool is_unsigned = false;
- if (IsNarrowerOperands(clippee->InputAt(0), clippee->InputAt(1), type, &r, &s, &is_unsigned) &&
- (is_add ? IsSaturatedAdd(clippee, type, lo, hi, is_unsigned)
- : IsSaturatedSub(clippee, type, lo, hi, is_unsigned))) {
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
+ bool is_add = true;
+ int64_t c = 0;
+ // First try for saturated addition.
+ if (IsAddConst2(graph_, clippee, /*out*/ &a, /*out*/ &b, /*out*/ &c) && c == 0 &&
+ IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) &&
+ IsSaturatedAdd(r, s, type, lo, hi, is_unsigned)) {
+ is_add = true;
} else {
- return false;
+ // Then try again for saturated subtraction.
+ a = b = r = s = nullptr;
+ if (IsSubConst2(graph_, clippee, /*out*/ &a, /*out*/ &b) &&
+ IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned) &&
+ IsSaturatedSub(r, type, lo, hi, is_unsigned)) {
+ is_add = false;
+ } else {
+ return false;
+ }
}
// Accept saturation idiom for vectorizable operands.
+ DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = instruction->InputAt(0);
s = instruction->InputAt(1);
@@ -2101,8 +2114,7 @@
HInstruction* a = nullptr;
HInstruction* b = nullptr;
int64_t c = 0;
- if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
- DCHECK(a != nullptr && b != nullptr);
+ if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
// Accept c == 1 (rounded) or c == 0 (not rounded).
bool is_rounded = false;
if (c == 1) {
@@ -2124,8 +2136,7 @@
}
// Accept recognized halving add for vectorizable operands. Vectorized code uses the
// shorthand idiomatic operation. Sequential code uses the original scalar expressions.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
+ DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = instruction->InputAt(0);
s = instruction->InputAt(1);
@@ -2175,19 +2186,11 @@
HInstruction* v = instruction->InputAt(1);
HInstruction* a = nullptr;
HInstruction* b = nullptr;
- if (v->GetType() == reduction_type && v->IsAbs()) {
- HInstruction* x = v->InputAt(0);
- if (x->GetType() == reduction_type) {
- int64_t c = 0;
- if (x->IsSub()) {
- a = x->InputAt(0);
- b = x->InputAt(1);
- } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) {
- b = graph_->GetConstant(reduction_type, -c); // hidden SUB!
- }
- }
- }
- if (a == nullptr || b == nullptr) {
+ if (v->IsAbs() &&
+ v->GetType() == reduction_type &&
+ IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) {
+ DCHECK(a != nullptr && b != nullptr);
+ } else {
return false;
}
// Accept same-type or consistent sign extension for narrower-type on operands a and b.
@@ -2220,8 +2223,7 @@
}
// Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand
// idiomatic operation. Sequential code uses the original scalar expressions.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
+ DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = s = v->InputAt(0);
}
diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
index 85c2fca..c09da81 100644
--- a/test/646-checker-hadd-short/src/Main.java
+++ b/test/646-checker-hadd-short/src/Main.java
@@ -26,6 +26,10 @@
static short[] sB2 = new short[M];
static short[] sBo = new short[M];
+ private static int $inline$mone() {
+ return -1;
+ }
+
/// CHECK-START: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (before)
/// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
@@ -184,6 +188,35 @@
}
}
+ /// CHECK-START: void Main.rounding_halving_add_signed_alt3(short[], short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<M1:i\d+>> IntConstant -1 loop:none
+ /// CHECK-DAG: <<I9:i\d+>> IntConstant 9 loop:none
+ /// CHECK-DAG: <<M9:i\d+>> IntConstant -9 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I9>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M9>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Add3>>,<<M1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Sub>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Shr>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_signed_alt3(short[], short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>> outer_loop:none
+ private static void rounding_halving_add_signed_alt3(short[] b1, short[] b2, short[] bo) {
+ int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+ for (int i = 0; i < min_length; i++) {
+ // Computations that cancel to adding 1 also do not confuse recognition.
+ bo[i] = (short) (((b1[i] + 9) + (b2[i] - 9) - $inline$mone()) >> 1);
+ }
+ }
+
/// CHECK-START: void Main.rounding_halving_add_unsigned(short[], short[], short[]) instruction_simplifier (before)
/// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535 loop:none
@@ -366,6 +399,11 @@
short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
expectEquals(e, sBo[i]);
}
+ rounding_halving_add_signed_alt3(sB1, sB2, sBo);
+ for (int i = 0; i < M; i++) {
+ short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+ expectEquals(e, sBo[i]);
+ }
rounding_halving_add_unsigned(sB1, sB2, sBo);
for (int i = 0; i < M; i++) {
short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
diff --git a/test/660-checker-simd-sad-short/src/Main.java b/test/660-checker-simd-sad-short/src/Main.java
index 8a44d9e..77c9e53 100644
--- a/test/660-checker-simd-sad-short/src/Main.java
+++ b/test/660-checker-simd-sad-short/src/Main.java
@@ -19,6 +19,10 @@
*/
public class Main {
+ private static int $inline$seven() {
+ return 7;
+ }
+
// TODO: lower precision still coming, b/64091002
private static short sadShort2Short(short[] s1, short[] s2) {
@@ -153,6 +157,102 @@
return sad;
}
+ /// CHECK-START: int Main.sadShort2IntConstant1(short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons:i\d+>> IntConstant -7 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:i\d+>> Add [<<Get1>>,<<Cons>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> Abs [<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntConstant1(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons:i\d+>> IntConstant 7 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cons>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntConstant1(short[] s) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(s[i] - 7); // s[i] + -7
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntConstant2(short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons:i\d+>> IntConstant 7 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Cons>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> Abs [<<Sub>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntConstant2(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons:i\d+>> IntConstant 7 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cons>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntConstant2(short[] s) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(s[i] - $inline$seven()); // s[i] - 7
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntConstant3(short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons:i\d+>> IntConstant 7 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:i\d+>> Add [<<Get1>>,<<Cons>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> Abs [<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntConstant3(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons:i\d+>> IntConstant -7 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cons>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntConstant3(short[] s) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(s[i] + $inline$seven()); // hidden s[i] - (-7)
+ }
+ return sad;
+ }
+
/// CHECK-START: long Main.sadShort2Long(short[], short[]) loop_optimization (before)
/// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
@@ -243,6 +343,9 @@
expectEquals(65535, sadShort2IntAlt(s2, s1));
expectEquals(65535, sadShort2IntAlt2(s1, s2));
expectEquals(65535, sadShort2IntAlt2(s2, s1));
+ expectEquals(32880, sadShort2IntConstant1(s1));
+ expectEquals(32880, sadShort2IntConstant2(s1));
+ expectEquals(32866, sadShort2IntConstant3(s1));
expectEquals(65535L, sadShort2Long(s1, s2));
expectEquals(65535L, sadShort2Long(s2, s1));
expectEquals(65536L, sadShort2LongAt1(s1, s2));
@@ -279,6 +382,9 @@
expectEquals(1291788, sadShort2Int(s1, s2));
expectEquals(1291788, sadShort2IntAlt(s1, s2));
expectEquals(1291788, sadShort2IntAlt2(s1, s2));
+ expectEquals(823907, sadShort2IntConstant1(s1));
+ expectEquals(823907, sadShort2IntConstant2(s1));
+ expectEquals(823953, sadShort2IntConstant3(s1));
expectEquals(1291788L, sadShort2Long(s1, s2));
expectEquals(1291789L, sadShort2LongAt1(s1, s2));
diff --git a/test/678-checker-simd-saturation/src/Main.java b/test/678-checker-simd-saturation/src/Main.java
index d123cc2..decc691 100644
--- a/test/678-checker-simd-saturation/src/Main.java
+++ b/test/678-checker-simd-saturation/src/Main.java
@@ -19,6 +19,14 @@
*/
public class Main {
+ static final int $inline$p15() {
+ return 15;
+ }
+
+ static final int $inline$m15() {
+ return -15;
+ }
+
//
// Direct min-max.
//
@@ -230,8 +238,8 @@
/// CHECK-START-{ARM,ARM64}: void Main.satSubPConstSByte(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
/// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Add:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Sub>>] loop:<<Loop>> outer_loop:none
public static void satSubPConstSByte(byte[] a, byte[] b) {
int n = Math.min(a.length, b.length);
for (int i = 0; i < n; i++) {
@@ -242,8 +250,8 @@
/// CHECK-START-{ARM,ARM64}: void Main.satSubNConstSByte(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
/// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Add:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Sub>>] loop:<<Loop>> outer_loop:none
public static void satSubNConstSByte(byte[] a, byte[] b) {
int n = Math.min(a.length, b.length);
for (int i = 0; i < n; i++) {
@@ -282,8 +290,8 @@
/// CHECK-START-{ARM,ARM64}: void Main.satSubPConstSShort(short[], short[]) loop_optimization (after)
/// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
/// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Add:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Sub>>] loop:<<Loop>> outer_loop:none
public static void satSubPConstSShort(short[] a, short[] b) {
int n = Math.min(a.length, b.length);
for (int i = 0; i < n; i++) {
@@ -294,8 +302,8 @@
/// CHECK-START-{ARM,ARM64}: void Main.satSubNConstSShort(short[], short[]) loop_optimization (after)
/// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
/// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Add:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:d\d+>> VecSaturationSub [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Sub>>] loop:<<Loop>> outer_loop:none
public static void satSubNConstSShort(short[] a, short[] b) {
int n = Math.min(a.length, b.length);
for (int i = 0; i < n; i++) {
@@ -304,7 +312,59 @@
}
//
- // Alternatives.
+ // Alternatives 8-bit clipping.
+ //
+
+ /// CHECK-START-{ARM,ARM64}: void Main.usatAddConst(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ public static void usatAddConst(byte[] a, byte[] b) {
+ int n = Math.min(a.length, b.length);
+ for (int i = 0; i < n; i++) {
+ b[i] = (byte) Math.min((a[i] & 0xff) + $inline$p15(), 255);
+ }
+ }
+
+ /// CHECK-START-{ARM,ARM64}: void Main.usatAddConstAlt(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecSaturationAdd [<<Get2>>,<<Get1>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ public static void usatAddConstAlt(byte[] a, byte[] b) {
+ int n = Math.min(a.length, b.length);
+ for (int i = 0; i < n; i++) {
+ b[i] = (byte) Math.min((a[i] & 0xff) - $inline$m15(), 255);
+ }
+ }
+
+ /// CHECK-START-{ARM,ARM64}: void Main.usatSubConst(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Sub:d\d+>> VecSaturationSub [<<Get2>>,<<Get1>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Sub>>] loop:<<Loop>> outer_loop:none
+ public static void usatSubConst(byte[] a, byte[] b) {
+ int n = Math.min(a.length, b.length);
+ for (int i = 0; i < n; i++) {
+ b[i] = (byte) Math.max((a[i] & 0xff) - $inline$p15(), 0);
+ }
+ }
+
+ /// CHECK-START-{ARM,ARM64}: void Main.usatSubConstAlt(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Sub:d\d+>> VecSaturationSub [<<Get2>>,<<Get1>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Sub>>] loop:<<Loop>> outer_loop:none
+ public static void usatSubConstAlt(byte[] a, byte[] b) {
+ int n = Math.min(a.length, b.length);
+ for (int i = 0; i < n; i++) {
+ b[i] = (byte) Math.max((a[i] & 0xff) + $inline$m15(), 0);
+ }
+ }
+
+ //
+ // Alternatives 16-bit clipping.
//
/// CHECK-START: void Main.satAlt1(short[], short[], short[]) loop_optimization (before)
@@ -442,6 +502,34 @@
}
}
+ /// CHECK-START-{ARM,ARM64}: void Main.usatSubConst(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Sub:d\d+>> VecSaturationSub [<<Get2>>,<<Get1>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Sub>>] loop:<<Loop>> outer_loop:none
+ public static void usatSubConst(short[] a, short[] b) {
+ int n = Math.min(a.length, b.length);
+ for (int i = 0; i < n; i++) {
+ int t = a[i] & 0xffff;
+ int s = t - $inline$p15();
+ b[i] = (short)(s > 0 ? s : 0);
+ }
+ }
+
+ /// CHECK-START-{ARM,ARM64}: void Main.usatSubConstAlt(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Get1:d\d+>> VecReplicateScalar loop:none
+ /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Phi:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Sub:d\d+>> VecSaturationSub [<<Get2>>,<<Get1>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Sub>>] loop:<<Loop>> outer_loop:none
+ public static void usatSubConstAlt(short[] a, short[] b) {
+ int n = Math.min(a.length, b.length);
+ for (int i = 0; i < n; i++) {
+ int t = a[i] & 0xffff;
+ int s = t + $inline$m15();
+ b[i] = (short)(s > 0 ? s : 0);
+ }
+ }
+
//
// Test drivers.
//
@@ -503,6 +591,27 @@
byte e = (byte) Math.max(-15 - b1[i], -128);
expectEquals(e, out[i]);
}
+ // Alternatives.
+ usatAddConst(b1, out);
+ for (int i = 0; i < m; i++) {
+ byte e = (byte) Math.min((b1[i] & 0xff) + 15, 255);
+ expectEquals(e, out[i]);
+ }
+ usatAddConstAlt(b1, out);
+ for (int i = 0; i < m; i++) {
+ byte e = (byte) Math.min((b1[i] & 0xff) + 15, 255);
+ expectEquals(e, out[i]);
+ }
+ usatSubConst(b1, out);
+ for (int i = 0; i < m; i++) {
+ byte e = (byte) Math.max((b1[i] & 0xff) - 15, 0);
+ expectEquals(e, out[i]);
+ }
+ usatSubConstAlt(b1, out);
+ for (int i = 0; i < m; i++) {
+ byte e = (byte) Math.max((b1[i] & 0xff) - 15, 0);
+ expectEquals(e, out[i]);
+ }
}
private static void test16Bit() {
@@ -630,6 +739,16 @@
short e = (short) Math.max(Math.min(s1[i] + 15, 32767), -32752);
expectEquals(e, out[i]);
}
+ usatSubConst(s1, out);
+ for (int i = 0; i < m; i++) {
+ short e = (short) Math.max((s1[i] & 0xffff) - 15, 0);
+ expectEquals(e, out[i]);
+ }
+ usatSubConstAlt(s1, out);
+ for (int i = 0; i < m; i++) {
+ short e = (short) Math.max((s1[i] & 0xffff) - 15, 0);
+ expectEquals(e, out[i]);
+ }
}
public static void main(String[] args) {