Implement halving add idiom (with checker tests).
Rationale:
First of several idioms that map to very efficient SIMD instructions.
Note that the is-zero-ext and is-sign-ext are general-purpose utilities
that will be widely used in the vectorizer to detect low precision
idioms, so expect that code to be shared with many CLs to come.
Test: test-art-host, test-art-target
Change-Id: If7dc2926c72a2e4b5cea15c44ef68cf5503e9be9
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc
index e7f7b30..6e82123 100644
--- a/compiler/optimizing/code_generator_vector_arm.cc
+++ b/compiler/optimizing/code_generator_vector_arm.cc
@@ -124,6 +124,14 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARM::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 0923920..2dfccff 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -318,6 +318,47 @@
}
}
+void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+ : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ instruction->IsRounded()
+ ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+ : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+ : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ instruction->IsRounded()
+ ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+ : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -420,6 +461,22 @@
}
}
+void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 74fa584..990178b 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -124,6 +124,14 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARMVIXL::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 6969abd..8ea1ca7 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -124,6 +124,14 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 87118ce..a484bb4 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -124,6 +124,14 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 8dabb4d..a86d060 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -350,6 +350,35 @@
}
}
+void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK(instruction->IsRounded());
+ DCHECK(instruction->IsUnsigned());
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pavgb(dst, src);
+ return;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pavgw(dst, src);
+ return;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -448,6 +477,22 @@
}
}
+void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index e956088..6967353 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -343,6 +343,31 @@
}
}
+void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pavgb(dst, src);
+ return;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pavgw(dst, src);
+ return;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -441,6 +466,22 @@
}
}
+void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index cc3c143..1b2b9f8 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -509,6 +509,11 @@
StartAttributeStream("kind") << deoptimize->GetKind();
}
+ void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha;
+ StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
+ }
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 1c8674d..7c833cf 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -45,18 +45,6 @@
return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2));
}
-/** Returns true for 32/64-bit constant instruction. */
-static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
- if (instruction->IsIntConstant()) {
- *value = instruction->AsIntConstant()->GetValue();
- return true;
- } else if (instruction->IsLongConstant()) {
- *value = instruction->AsLongConstant()->GetValue();
- return true;
- }
- return false;
-}
-
/** Computes a * b for a,b > 0 (at least until first overflow happens). */
static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) {
if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) {
@@ -106,7 +94,7 @@
}
}
int64_t value = -1;
- return IsIntAndGet(instruction, &value) && value >= 0;
+ return IsInt64AndGet(instruction, &value) && value >= 0;
}
/** Hunts "under the hood" for a suitable instruction at the hint. */
@@ -149,7 +137,7 @@
int64_t value;
if (v.instruction->IsDiv() &&
v.instruction->InputAt(0)->IsArrayLength() &&
- IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
+ IsInt64AndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
}
// If a == 1, the most suitable one suffices as maximum value.
@@ -444,7 +432,7 @@
// any of the three requests (kExact, kAtMost, and KAtLeast).
if (info->induction_class == HInductionVarAnalysis::kInvariant &&
info->operation == HInductionVarAnalysis::kFetch) {
- if (IsIntAndGet(info->fetch, value)) {
+ if (IsInt64AndGet(info->fetch, value)) {
return true;
}
}
@@ -635,7 +623,7 @@
int64_t f = 0;
if (IsConstant(info->op_a, kExact, &a) &&
CanLongValueFitIntoInt(a) &&
- IsIntAndGet(info->fetch, &f) && f >= 1) {
+ IsInt64AndGet(info->fetch, &f) && f >= 1) {
// Conservative bounds on a * f^-i + b with f >= 1 can be computed without
// trip count. Other forms would require a much more elaborate evaluation.
const bool is_min_a = a >= 0 ? is_min : !is_min;
@@ -663,7 +651,7 @@
// Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that
// it becomes more likely range analysis will compare the same instructions as terminal nodes.
int64_t value;
- if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
+ if (IsInt64AndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
// Proper constant reveals best information.
return Value(static_cast<int32_t>(value));
} else if (instruction == chase_hint_) {
@@ -671,10 +659,10 @@
return Value(instruction, 1, 0);
} else if (instruction->IsAdd()) {
// Incorporate suitable constants in the chased value.
- if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
+ if (IsInt64AndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
return AddValue(Value(static_cast<int32_t>(value)),
GetFetch(instruction->InputAt(1), trip, in_body, is_min));
- } else if (IsIntAndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
+ } else if (IsInt64AndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
Value(static_cast<int32_t>(value)));
}
@@ -1074,7 +1062,7 @@
// Detect known base and trip count (always taken).
int64_t f = 0;
int64_t m = 0;
- if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
+ if (IsInt64AndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
HInstruction* opa = nullptr;
HInstruction* opb = nullptr;
if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) &&
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 8e88c1e..5a95abd 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -63,12 +63,122 @@
return false;
}
+// Detect a sign extension from the given type. Returns the promoted operand on success.
+static bool IsSignExtensionAndGet(HInstruction* instruction,
+ Primitive::Type type,
+ /*out*/ HInstruction** operand) {
+ // Accept any already wider constant that would be handled properly by sign
+ // extension when represented in the *width* of the given narrower data type
+ // (the fact that char normally zero extends does not matter here).
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ if (std::numeric_limits<int8_t>::min() <= value &&
+ std::numeric_limits<int8_t>::max() >= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (std::numeric_limits<int16_t>::min() <= value &&
+ std::numeric_limits<int16_t>::max() <= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+ // An implicit widening conversion of a signed integer to an integral type sign-extends
+ // the two's-complement representation of the integer value to fill the wider format.
+ if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet())) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ *operand = instruction;
+ return true;
+ default:
+ return false;
+ }
+ }
+ // TODO: perhaps explicit conversions later too?
+ // (this may return something different from instruction)
+ return false;
+}
+
+// Detect a zero extension from the given type. Returns the promoted operand on success.
+static bool IsZeroExtensionAndGet(HInstruction* instruction,
+ Primitive::Type type,
+ /*out*/ HInstruction** operand) {
+ // Accept any already wider constant that would be handled properly by zero
+ // extension when represented in the *width* of the given narrower data type
+ // (the fact that byte/short normally sign extend does not matter here).
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ if (std::numeric_limits<uint8_t>::min() <= value &&
+ std::numeric_limits<uint8_t>::max() >= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (std::numeric_limits<uint16_t>::min() <= value &&
+ std::numeric_limits<uint16_t>::max() <= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+ // An implicit widening conversion of a char to an integral type zero-extends
+ // the representation of the char value to fill the wider format.
+ if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet())) {
+ if (type == Primitive::kPrimChar) {
+ *operand = instruction;
+ return true;
+ }
+ }
+ // A sign (or zero) extension followed by an explicit removal of just the
+ // higher sign bits is equivalent to a zero extension of the underlying operand.
+ if (instruction->IsAnd()) {
+ int64_t mask = 0;
+ HInstruction* a = instruction->InputAt(0);
+ HInstruction* b = instruction->InputAt(1);
+ // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask.
+ if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) ||
+ IsZeroExtensionAndGet(b, type, /*out*/ operand))) ||
+ (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) ||
+ IsZeroExtensionAndGet(a, type, /*out*/ operand)))) {
+ switch ((*operand)->GetType()) {
+ case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max();
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max();
+ default: return false;
+ }
+ }
+ }
+ // TODO: perhaps explicit conversions later too?
+ return false;
+}
+
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
}
-// Inserts an instruction.
+// Insert an instruction.
static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
DCHECK(block != nullptr);
DCHECK(instruction != nullptr);
@@ -713,6 +823,10 @@
return true;
}
} else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
+ // Recognize vectorization idioms.
+ if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) {
+ return true;
+ }
// Deal with vector restrictions.
if ((HasVectorRestrictions(restrictions, kNoShift)) ||
(instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
@@ -806,11 +920,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs;
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1039,6 +1153,90 @@
#undef GENERATE_VEC
//
+// Vectorization idioms.
+//
+
+// Method recognizes the following idioms:
+// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
+// regular halving add (a + b) >> 1 for unsigned/signed operands a, b
+// Provided that the operands are promoted to a wider form to do the arithmetic and
+// then cast back to narrower form, the idioms can be mapped into efficient SIMD
+// implementation that operates directly in narrower form (plus one extra bit).
+// TODO: current version recognizes implicit byte/short/char widening only;
+// explicit widening from int to long could be added later.
+bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions) {
+ // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
+ // (note whether the sign bit in higher precision is shifted in has no effect
+ // on the narrow precision computed by the idiom).
+ int64_t value = 0;
+ if ((instruction->IsShr() ||
+ instruction->IsUShr()) &&
+ IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) {
+ //
+ // TODO: make following code less sensitive to associativity and commutativity differences.
+ //
+ HInstruction* x = instruction->InputAt(0);
+ // Test for an optional rounding part (x + 1) >> 1.
+ bool is_rounded = false;
+ if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) {
+ x = x->InputAt(0);
+ is_rounded = true;
+ }
+ // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
+ if (x->IsAdd()) {
+ HInstruction* a = x->InputAt(0);
+ HInstruction* b = x->InputAt(1);
+ HInstruction* r = nullptr;
+ HInstruction* s = nullptr;
+ bool is_unsigned = false;
+ if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
+ is_unsigned = true;
+ } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
+ is_unsigned = false;
+ } else {
+ return false;
+ }
+ // Deal with vector restrictions.
+ if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) ||
+ (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) {
+ return false;
+ }
+ // Accept recognized halving add for vectorizable operands. Vectorized code uses the
+ // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
+ DCHECK(r != nullptr && s != nullptr);
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd(
+ global_allocator_,
+ vector_map_->Get(r),
+ vector_map_->Get(s),
+ type,
+ vector_length_,
+ is_unsigned,
+ is_rounded));
+ } else {
+ VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
+ VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
+ GenerateVecOp(instruction,
+ vector_map_->Get(instruction->InputAt(0)),
+ vector_map_->Get(instruction->InputAt(1)),
+ type);
+ }
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+//
// Helpers.
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index d8f50aa..4a7da86 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -62,13 +62,15 @@
* Vectorization restrictions (bit mask).
*/
enum VectorRestrictions {
- kNone = 0, // no restrictions
- kNoMul = 1, // no multiplication
- kNoDiv = 2, // no division
- kNoShift = 4, // no shift
- kNoShr = 8, // no arithmetic shift right
- kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
- kNoAbs = 32, // no absolute value
+ kNone = 0, // no restrictions
+ kNoMul = 1, // no multiplication
+ kNoDiv = 2, // no division
+ kNoShift = 4, // no shift
+ kNoShr = 8, // no arithmetic shift right
+ kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
+ kNoSignedHAdd = 32, // no signed halving add
+ kNoUnroundedHAdd = 64, // no unrounded halving add
+ kNoAbs = 128, // no absolute value
};
/*
@@ -136,6 +138,13 @@
Primitive::Type type);
void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
+ // Vectorization idioms.
+ bool VectorizeHalvingAddIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions);
+
// Helpers.
bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
bool TrySetSimpleLoopHeader(HBasicBlock* block);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c109369..6be237e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1369,9 +1369,12 @@
M(VecAbs, VecUnaryOperation) \
M(VecNot, VecUnaryOperation) \
M(VecAdd, VecBinaryOperation) \
+ M(VecHalvingAdd, VecBinaryOperation) \
M(VecSub, VecBinaryOperation) \
M(VecMul, VecBinaryOperation) \
M(VecDiv, VecBinaryOperation) \
+ M(VecMin, VecBinaryOperation) \
+ M(VecMax, VecBinaryOperation) \
M(VecAnd, VecBinaryOperation) \
M(VecAndNot, VecBinaryOperation) \
M(VecOr, VecBinaryOperation) \
@@ -6845,6 +6848,7 @@
DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator);
};
+// Returns int64_t value of a properly typed constant.
inline int64_t Int64FromConstant(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
@@ -6856,6 +6860,21 @@
}
}
+// Returns true iff instruction is an integral constant (and sets value on success).
+inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) {
+ if (instruction->IsIntConstant()) {
+ *value = instruction->AsIntConstant()->GetValue();
+ return true;
+ } else if (instruction->IsLongConstant()) {
+ *value = instruction->AsLongConstant()->GetValue();
+ return true;
+ } else if (instruction->IsNullConstant()) {
+ *value = 0;
+ return true;
+ }
+ return false;
+}
+
#define INSTRUCTION_TYPE_CHECK(type, super) \
inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \
inline const H##type* HInstruction::As##type() const { \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 0cbbf2a..bff58d0 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -338,6 +338,42 @@
DISALLOW_COPY_AND_ASSIGN(HVecAdd);
};
+// Performs halving add on every component in the two vectors, viz.
+// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
+// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
+// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension).
+class HVecHalvingAdd FINAL : public HVecBinaryOperation {
+ public:
+ HVecHalvingAdd(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ bool is_unsigned,
+ bool is_rounded,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc),
+ is_unsigned_(is_unsigned),
+ is_rounded_(is_rounded) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+
+ bool IsUnsigned() const { return is_unsigned_; }
+ bool IsRounded() const { return is_rounded_; }
+
+ DECLARE_INSTRUCTION(VecHalvingAdd);
+
+ private:
+ bool is_unsigned_;
+ bool is_rounded_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecHalvingAdd);
+};
+
// Subtracts every component in the two vectors,
// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ].
class HVecSub FINAL : public HVecBinaryOperation {
@@ -404,6 +440,50 @@
DISALLOW_COPY_AND_ASSIGN(HVecDiv);
};
+// Takes minimum of every component in the two vectors,
+// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ].
+class HVecMin FINAL : public HVecBinaryOperation {
+ public:
+ HVecMin(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecMin);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecMin);
+};
+
+// Takes maximum of every component in the two vectors,
+// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ].
+class HVecMax FINAL : public HVecBinaryOperation {
+ public:
+ HVecMax(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecMax);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecMax);
+};
+
// Bitwise-ands every component in the two vectors,
// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ].
class HVecAnd FINAL : public HVecBinaryOperation {