Vectorization of saturation arithmetic.
Rationale:
Because faster is better.
Bug: b/74026074
Test: test-art-host,target
Change-Id: Ifa970a62cef1c0b8bb1c593f629d8c724f1ffe0e
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index d3b081e..abd644a 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -36,6 +36,10 @@
// No loop unrolling factor (just one copy of the loop-body).
static constexpr uint32_t kNoUnrollingFactor = 1;
+// Values that indicate unbounded end.
+static constexpr int64_t kNoLo = std::numeric_limits<int64_t>::min();
+static constexpr int64_t kNoHi = std::numeric_limits<int64_t>::max();
+
//
// Static helpers.
//
@@ -331,6 +335,74 @@
return false;
}
+// Detect clipped [lo, hi] range for nested MIN-MAX operations on a clippee,
+// such as MIN(hi, MAX(lo, clippee)) for an arbitrary clippee expression.
+// Example: MIN(10, MIN(20, MAX(0, x))) yields [0, 10] with clippee x.
+static bool IsClipped(HInstruction* instruction,
+ /*out*/ int64_t* lo,
+ /*out*/ int64_t* hi,
+ /*out*/ HInstruction** clippee) {
+ // Recurse into MIN-MAX expressions and 'tighten' the range [lo, hi].
+ if (instruction->IsMin() || instruction->IsMax()) {
+ // Find MIN-MAX(const, ..) or MIN-MAX(.., const).
+ for (int i = 0; i < 2; i++) {
+ int64_t c = 0;
+ if (IsInt64AndGet(instruction->InputAt(i), &c)) {
+ if (instruction->IsMin()) {
+ *hi = std::min(*hi, c);
+ } else {
+ *lo = std::max(*lo, c);
+ }
+ return IsClipped(instruction->InputAt(1 - i), lo, hi, clippee);
+ }
+ }
+ // Recursion fails at any MIN/MAX that does not have one constant
+ // argument, e.g. MIN(x, y) or MAX(2 * x, f()).
+ return false;
+ }
+ // Recursion ends in any other expression. At this point we record the leaf
+ // expression as the clippee and report success on the range [lo, hi].
+ DCHECK(*clippee == nullptr);
+ *clippee = instruction;
+ return true;
+}
+
+// Accept various saturated addition forms.
+static bool IsSaturatedAdd(DataType::Type type, int64_t lo, int64_t hi, bool is_unsigned) {
+ // MIN(r + s, 255) => SAT_ADD_unsigned
+ // MAX(MIN(r + s, 127), -128) => SAT_ADD_signed etc.
+ if (DataType::Size(type) == 1) {
+ return is_unsigned
+ ? (lo <= 0 && hi == std::numeric_limits<uint8_t>::max())
+ : (lo == std::numeric_limits<int8_t>::min() &&
+ hi == std::numeric_limits<int8_t>::max());
+ } else if (DataType::Size(type) == 2) {
+ return is_unsigned
+ ? (lo <= 0 && hi == std::numeric_limits<uint16_t>::max())
+ : (lo == std::numeric_limits<int16_t>::min() &&
+ hi == std::numeric_limits<int16_t>::max());
+ }
+ return false;
+}
+
+// Accept various saturated subtraction forms.
+static bool IsSaturatedSub(DataType::Type type, int64_t lo, int64_t hi, bool is_unsigned) {
+ // MAX(r - s, 0) => SAT_SUB_unsigned
+ // MIN(MAX(r - s, -128), 127) => SAT_ADD_signed etc.
+ if (DataType::Size(type) == 1) {
+ return is_unsigned
+ ? (lo == 0 && hi >= std::numeric_limits<uint8_t>::max())
+ : (lo == std::numeric_limits<int8_t>::min() &&
+ hi == std::numeric_limits<int8_t>::max());
+ } else if (DataType::Size(type) == 2) {
+ return is_unsigned
+ ? (lo == 0 && hi >= std::numeric_limits<uint16_t>::min())
+ : (lo == std::numeric_limits<int16_t>::min() &&
+ hi == std::numeric_limits<int16_t>::max());
+ }
+ return false;
+}
+
// Detect reductions of the following forms,
// x = x_phi + ..
// x = x_phi - ..
@@ -1109,7 +1181,6 @@
return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
}
-// TODO: saturation arithmetic.
bool HLoopOptimization::VectorizeUse(LoopNode* node,
HInstruction* instruction,
bool generate_code,
@@ -1308,6 +1379,10 @@
return true;
}
} else if (instruction->IsMin() || instruction->IsMax()) {
+ // Recognize saturation arithmetic.
+ if (VectorizeSaturationIdiom(node, instruction, generate_code, type, restrictions)) {
+ return true;
+ }
// Deal with vector restrictions.
HInstruction* opa = instruction->InputAt(0);
HInstruction* opb = instruction->InputAt(1);
@@ -1439,11 +1514,11 @@
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoSaturation;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
*restrictions |= kNoDiv;
@@ -1468,11 +1543,11 @@
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoSaturation;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoSaturation | kNoStringCharAt;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
*restrictions |= kNoDiv;
@@ -1811,6 +1886,73 @@
// Vectorization idioms.
//
+// Method recognizes single and double clipping saturation arithmetic.
+bool HLoopOptimization::VectorizeSaturationIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ DataType::Type type,
+ uint64_t restrictions) {
+ // Deal with vector restrictions.
+ if (HasVectorRestrictions(restrictions, kNoSaturation)) {
+ return false;
+ }
+ // Search for clipping of a clippee.
+ int64_t lo = kNoLo;
+ int64_t hi = kNoHi;
+ HInstruction* clippee = nullptr;
+ if (!IsClipped(instruction, &lo, &hi, &clippee)) {
+ return false;
+ }
+ CHECK(clippee != nullptr);
+ // Clipped addition or subtraction?
+ bool is_add = true;
+ if (clippee->IsAdd()) {
+ is_add = true;
+ } else if (clippee->IsSub()) {
+ is_add = false;
+ } else {
+ return false; // clippee is not add/sub
+ }
+ // Addition or subtraction on narrower operands?
+ HInstruction* r = nullptr;
+ HInstruction* s = nullptr;
+ bool is_unsigned = false;
+ if (IsNarrowerOperands(clippee->InputAt(0), clippee->InputAt(1), type, &r, &s, &is_unsigned) &&
+ (is_add ? IsSaturatedAdd(type, lo, hi, is_unsigned)
+ : IsSaturatedSub(type, lo, hi, is_unsigned))) {
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
+ } else {
+ return false;
+ }
+ // Accept saturation idiom for vectorizable operands.
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = instruction->InputAt(0);
+ s = instruction->InputAt(1);
+ restrictions &= ~(kNoHiBits | kNoMinMax); // allow narrow MIN/MAX in seq
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ DataType::Type vtype = HVecOperation::ToProperType(type, is_unsigned);
+ HInstruction* op1 = vector_map_->Get(r);
+ HInstruction* op2 = vector_map_->Get(s);
+ vector_map_->Put(instruction, is_add
+ ? reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationAdd(
+ global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc))
+ : reinterpret_cast<HInstruction*>(new (global_allocator_) HVecSaturationSub(
+ global_allocator_, op1, op2, vtype, vector_length_, kNoDexPc)));
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
+ } else {
+ GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
// Method recognizes the following idioms:
// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
// truncated halving add (a + b) >> 1 for unsigned/signed operands a, b