summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_vector_arm.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc57
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc45
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc41
-rw-r--r--compiler/optimizing/graph_visualizer.cc5
-rw-r--r--compiler/optimizing/induction_var_range.cc28
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc194
-rw-r--r--compiler/optimizing/loop_optimization.cc204
-rw-r--r--compiler/optimizing/loop_optimization.h23
-rw-r--r--compiler/optimizing/nodes.h19
-rw-r--r--compiler/optimizing/nodes_vector.h80
14 files changed, 761 insertions, 31 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc
index e7f7b3019c..6e82123e56 100644
--- a/compiler/optimizing/code_generator_vector_arm.cc
+++ b/compiler/optimizing/code_generator_vector_arm.cc
@@ -124,6 +124,14 @@ void InstructionCodeGeneratorARM::VisitVecAdd(HVecAdd* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@ void InstructionCodeGeneratorARM::VisitVecDiv(HVecDiv* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARM::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 0923920366..2dfccfff85 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -318,6 +318,47 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
}
}
+void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+ : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ instruction->IsRounded()
+ ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+ : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+ : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ instruction->IsRounded()
+ ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+ : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -420,6 +461,22 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
}
}
+void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 74fa584e09..990178b31b 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -124,6 +124,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@ void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARMVIXL::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 6969abd422..8ea1ca7d90 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -124,6 +124,14 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 87118cefa5..a484bb4774 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -124,6 +124,14 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 8dabb4d08f..a86d060821 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -350,6 +350,35 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
}
}
+void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK(instruction->IsRounded());
+ DCHECK(instruction->IsUnsigned());
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pavgb(dst, src);
+ return;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pavgw(dst, src);
+ return;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -448,6 +477,22 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
}
}
+void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index e95608839b..696735367e 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -343,6 +343,31 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
}
}
+void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pavgb(dst, src);
+ return;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pavgw(dst, src);
+ return;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -441,6 +466,22 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
}
}
+void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index cc3c143b15..1b2b9f80ac 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -509,6 +509,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("kind") << deoptimize->GetKind();
}
+ void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha;
+ StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
+ }
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 1c8674d522..7c833cf70c 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -45,18 +45,6 @@ static bool IsSafeDiv(int32_t c1, int32_t c2) {
return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2));
}
-/** Returns true for 32/64-bit constant instruction. */
-static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
- if (instruction->IsIntConstant()) {
- *value = instruction->AsIntConstant()->GetValue();
- return true;
- } else if (instruction->IsLongConstant()) {
- *value = instruction->AsLongConstant()->GetValue();
- return true;
- }
- return false;
-}
-
/** Computes a * b for a,b > 0 (at least until first overflow happens). */
static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) {
if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) {
@@ -106,7 +94,7 @@ static bool IsGEZero(HInstruction* instruction) {
}
}
int64_t value = -1;
- return IsIntAndGet(instruction, &value) && value >= 0;
+ return IsInt64AndGet(instruction, &value) && value >= 0;
}
/** Hunts "under the hood" for a suitable instruction at the hint. */
@@ -149,7 +137,7 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v, HInstruc
int64_t value;
if (v.instruction->IsDiv() &&
v.instruction->InputAt(0)->IsArrayLength() &&
- IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
+ IsInt64AndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
}
// If a == 1, the most suitable one suffices as maximum value.
@@ -444,7 +432,7 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
// any of the three requests (kExact, kAtMost, and KAtLeast).
if (info->induction_class == HInductionVarAnalysis::kInvariant &&
info->operation == HInductionVarAnalysis::kFetch) {
- if (IsIntAndGet(info->fetch, value)) {
+ if (IsInt64AndGet(info->fetch, value)) {
return true;
}
}
@@ -635,7 +623,7 @@ InductionVarRange::Value InductionVarRange::GetGeometric(HInductionVarAnalysis::
int64_t f = 0;
if (IsConstant(info->op_a, kExact, &a) &&
CanLongValueFitIntoInt(a) &&
- IsIntAndGet(info->fetch, &f) && f >= 1) {
+ IsInt64AndGet(info->fetch, &f) && f >= 1) {
// Conservative bounds on a * f^-i + b with f >= 1 can be computed without
// trip count. Other forms would require a much more elaborate evaluation.
const bool is_min_a = a >= 0 ? is_min : !is_min;
@@ -663,7 +651,7 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
// Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that
// it becomes more likely range analysis will compare the same instructions as terminal nodes.
int64_t value;
- if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
+ if (IsInt64AndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
// Proper constant reveals best information.
return Value(static_cast<int32_t>(value));
} else if (instruction == chase_hint_) {
@@ -671,10 +659,10 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
return Value(instruction, 1, 0);
} else if (instruction->IsAdd()) {
// Incorporate suitable constants in the chased value.
- if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
+ if (IsInt64AndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
return AddValue(Value(static_cast<int32_t>(value)),
GetFetch(instruction->InputAt(1), trip, in_body, is_min));
- } else if (IsIntAndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
+ } else if (IsInt64AndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
Value(static_cast<int32_t>(value)));
}
@@ -1074,7 +1062,7 @@ bool InductionVarRange::GenerateLastValueGeometric(HInductionVarAnalysis::Induct
// Detect known base and trip count (always taken).
int64_t f = 0;
int64_t m = 0;
- if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
+ if (IsInt64AndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
HInstruction* opa = nullptr;
HInstruction* opb = nullptr;
if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) &&
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 82d0567ef9..b57b41f686 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2093,6 +2093,199 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Bind(&done);
}
+// static void java.lang.System.arraycopy(Object src, int srcPos,
+// Object dest, int destPos,
+// int length)
+void IntrinsicLocationsBuilderMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+ // As long as we are checking, we might as well check to see if the src and dest
+ // positions are >= 0.
+ if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+ (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+ // We will have to fail anyways.
+ return;
+ }
+
+ // And since we are already checking, check the length too.
+ if (length != nullptr) {
+ int32_t len = length->GetValue();
+ if (len < 0) {
+ // Just call as normal.
+ return;
+ }
+ }
+
+ // Okay, it is safe to generate inline code.
+ LocationSummary* locations =
+ new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
+ // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+ locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+// Utility routine to verify that "length(input) - pos >= length"
+static void EnoughItems(Mips64Assembler* assembler,
+ GpuRegister length_input_minus_pos,
+ Location length,
+ SlowPathCodeMIPS64* slow_path) {
+ if (length.IsConstant()) {
+ int32_t length_constant = length.GetConstant()->AsIntConstant()->GetValue();
+
+ if (IsInt<16>(length_constant)) {
+ __ Slti(TMP, length_input_minus_pos, length_constant);
+ __ Bnezc(TMP, slow_path->GetEntryLabel());
+ } else {
+ __ LoadConst32(TMP, length_constant);
+ __ Bltc(length_input_minus_pos, TMP, slow_path->GetEntryLabel());
+ }
+ } else {
+ __ Bltc(length_input_minus_pos, length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+ }
+}
+
+static void CheckPosition(Mips64Assembler* assembler,
+ Location pos,
+ GpuRegister input,
+ Location length,
+ SlowPathCodeMIPS64* slow_path,
+ bool length_is_input_length = false) {
+ // Where is the length in the Array?
+ const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+ // Calculate length(input) - pos.
+ if (pos.IsConstant()) {
+ int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+ if (pos_const == 0) {
+ if (!length_is_input_length) {
+ // Check that length(input) >= length.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+ } else {
+ // Check that (length(input) - pos) >= zero.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ DCHECK_GT(pos_const, 0);
+ __ Addiu32(AT, AT, -pos_const);
+ __ Bltzc(AT, slow_path->GetEntryLabel());
+
+ // Verify that (length(input) - pos) >= length.
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+ } else if (length_is_input_length) {
+ // The only way the copy can succeed is if pos is zero.
+ GpuRegister pos_reg = pos.AsRegister<GpuRegister>();
+ __ Bnezc(pos_reg, slow_path->GetEntryLabel());
+ } else {
+ // Verify that pos >= 0.
+ GpuRegister pos_reg = pos.AsRegister<GpuRegister>();
+ __ Bltzc(pos_reg, slow_path->GetEntryLabel());
+
+ // Check that (length(input) - pos) >= zero.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ __ Subu(AT, AT, pos_reg);
+ __ Bltzc(AT, slow_path->GetEntryLabel());
+
+ // Verify that (length(input) - pos) >= length.
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+ Mips64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+ Location src_pos = locations->InAt(1);
+ GpuRegister dest = locations->InAt(2).AsRegister<GpuRegister>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+
+ Mips64Label loop;
+
+ GpuRegister dest_base = locations->GetTemp(0).AsRegister<GpuRegister>();
+ GpuRegister src_base = locations->GetTemp(1).AsRegister<GpuRegister>();
+ GpuRegister count = locations->GetTemp(2).AsRegister<GpuRegister>();
+
+ SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ // Bail out if the source and destination are the same (to handle overlap).
+ __ Beqc(src, dest, slow_path->GetEntryLabel());
+
+ // Bail out if the source is null.
+ __ Beqzc(src, slow_path->GetEntryLabel());
+
+ // Bail out if the destination is null.
+ __ Beqzc(dest, slow_path->GetEntryLabel());
+
+ // Load length into register for count.
+ if (length.IsConstant()) {
+ __ LoadConst32(count, length.GetConstant()->AsIntConstant()->GetValue());
+ } else {
+ // If the length is negative, bail out.
+ // We have already checked in the LocationsBuilder for the constant case.
+ __ Bltzc(length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+
+ __ Move(count, length.AsRegister<GpuRegister>());
+ }
+
+ // Validity checks: source.
+ CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path);
+
+ // Validity checks: dest.
+ CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path);
+
+ // If count is zero, we're done.
+ __ Beqzc(count, slow_path->GetExitLabel());
+
+ // Okay, everything checks out. Finally time to do the copy.
+ // Check assumption that sizeof(Char) is 2 (used in scaling below).
+ const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+ DCHECK_EQ(char_size, 2u);
+
+ const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar);
+
+ const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+ // Calculate source and destination addresses.
+ if (src_pos.IsConstant()) {
+ int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
+
+ __ Daddiu64(src_base, src, data_offset + char_size * src_pos_const, TMP);
+ } else {
+ __ Daddiu64(src_base, src, data_offset, TMP);
+ __ Dlsa(src_base, src_pos.AsRegister<GpuRegister>(), src_base, char_shift);
+ }
+ if (dest_pos.IsConstant()) {
+ int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+
+ __ Daddiu64(dest_base, dest, data_offset + char_size * dest_pos_const, TMP);
+ } else {
+ __ Daddiu64(dest_base, dest, data_offset, TMP);
+ __ Dlsa(dest_base, dest_pos.AsRegister<GpuRegister>(), dest_base, char_shift);
+ }
+
+ __ Bind(&loop);
+ __ Lh(TMP, src_base, 0);
+ __ Daddiu(src_base, src_base, char_size);
+ __ Daddiu(count, count, -1);
+ __ Sh(TMP, dest_base, 0);
+ __ Daddiu(dest_base, dest_base, char_size);
+ __ Bnezc(count, &loop);
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
static void GenHighestOneBit(LocationSummary* locations,
Primitive::Type type,
Mips64Assembler* assembler) {
@@ -2372,7 +2565,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathTanh(HInvoke* invoke) {
}
UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 8e88c1ec7f..5a95abdb50 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -63,12 +63,122 @@ static bool IsEarlyExit(HLoopInformation* loop_info) {
return false;
}
+// Detect a sign extension from the given type. Returns the promoted operand on success.
+static bool IsSignExtensionAndGet(HInstruction* instruction,
+ Primitive::Type type,
+ /*out*/ HInstruction** operand) {
+ // Accept any already wider constant that would be handled properly by sign
+ // extension when represented in the *width* of the given narrower data type
+ // (the fact that char normally zero extends does not matter here).
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ if (std::numeric_limits<int8_t>::min() <= value &&
+ std::numeric_limits<int8_t>::max() >= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (std::numeric_limits<int16_t>::min() <= value &&
+ std::numeric_limits<int16_t>::max() <= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+ // An implicit widening conversion of a signed integer to an integral type sign-extends
+ // the two's-complement representation of the integer value to fill the wider format.
+ if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet())) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ *operand = instruction;
+ return true;
+ default:
+ return false;
+ }
+ }
+ // TODO: perhaps explicit conversions later too?
+ // (this may return something different from instruction)
+ return false;
+}
+
+// Detect a zero extension from the given type. Returns the promoted operand on success.
+static bool IsZeroExtensionAndGet(HInstruction* instruction,
+ Primitive::Type type,
+ /*out*/ HInstruction** operand) {
+ // Accept any already wider constant that would be handled properly by zero
+ // extension when represented in the *width* of the given narrower data type
+ // (the fact that byte/short normally sign extend does not matter here).
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ if (std::numeric_limits<uint8_t>::min() <= value &&
+ std::numeric_limits<uint8_t>::max() >= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (std::numeric_limits<uint16_t>::min() <= value &&
+ std::numeric_limits<uint16_t>::max() <= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+ // An implicit widening conversion of a char to an integral type zero-extends
+ // the representation of the char value to fill the wider format.
+ if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet())) {
+ if (type == Primitive::kPrimChar) {
+ *operand = instruction;
+ return true;
+ }
+ }
+ // A sign (or zero) extension followed by an explicit removal of just the
+ // higher sign bits is equivalent to a zero extension of the underlying operand.
+ if (instruction->IsAnd()) {
+ int64_t mask = 0;
+ HInstruction* a = instruction->InputAt(0);
+ HInstruction* b = instruction->InputAt(1);
+ // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask.
+ if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) ||
+ IsZeroExtensionAndGet(b, type, /*out*/ operand))) ||
+ (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) ||
+ IsZeroExtensionAndGet(a, type, /*out*/ operand)))) {
+ switch ((*operand)->GetType()) {
+ case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max();
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max();
+ default: return false;
+ }
+ }
+ }
+ // TODO: perhaps explicit conversions later too?
+ return false;
+}
+
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
}
-// Inserts an instruction.
+// Insert an instruction.
static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
DCHECK(block != nullptr);
DCHECK(instruction != nullptr);
@@ -713,6 +823,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
return true;
}
} else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
+ // Recognize vectorization idioms.
+ if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) {
+ return true;
+ }
// Deal with vector restrictions.
if ((HasVectorRestrictions(restrictions, kNoShift)) ||
(instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
@@ -806,11 +920,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs;
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1039,6 +1153,90 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
#undef GENERATE_VEC
//
+// Vectorization idioms.
+//
+
+// Method recognizes the following idioms:
+// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
+// regular halving add (a + b) >> 1 for unsigned/signed operands a, b
+// Provided that the operands are promoted to a wider form to do the arithmetic and
+// then cast back to narrower form, the idioms can be mapped into efficient SIMD
+// implementation that operates directly in narrower form (plus one extra bit).
+// TODO: current version recognizes implicit byte/short/char widening only;
+// explicit widening from int to long could be added later.
+bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions) {
+ // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
+ // (note whether the sign bit in higher precision is shifted in has no effect
+ // on the narrow precision computed by the idiom).
+ int64_t value = 0;
+ if ((instruction->IsShr() ||
+ instruction->IsUShr()) &&
+ IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) {
+ //
+ // TODO: make following code less sensitive to associativity and commutativity differences.
+ //
+ HInstruction* x = instruction->InputAt(0);
+ // Test for an optional rounding part (x + 1) >> 1.
+ bool is_rounded = false;
+ if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) {
+ x = x->InputAt(0);
+ is_rounded = true;
+ }
+ // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
+ if (x->IsAdd()) {
+ HInstruction* a = x->InputAt(0);
+ HInstruction* b = x->InputAt(1);
+ HInstruction* r = nullptr;
+ HInstruction* s = nullptr;
+ bool is_unsigned = false;
+ if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
+ is_unsigned = true;
+ } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
+ is_unsigned = false;
+ } else {
+ return false;
+ }
+ // Deal with vector restrictions.
+ if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) ||
+ (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) {
+ return false;
+ }
+ // Accept recognized halving add for vectorizable operands. Vectorized code uses the
+ // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
+ DCHECK(r != nullptr && s != nullptr);
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd(
+ global_allocator_,
+ vector_map_->Get(r),
+ vector_map_->Get(s),
+ type,
+ vector_length_,
+ is_unsigned,
+ is_rounded));
+ } else {
+ VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
+ VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
+ GenerateVecOp(instruction,
+ vector_map_->Get(instruction->InputAt(0)),
+ vector_map_->Get(instruction->InputAt(1)),
+ type);
+ }
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+//
// Helpers.
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index d8f50aab28..4a7da86e32 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -62,13 +62,15 @@ class HLoopOptimization : public HOptimization {
* Vectorization restrictions (bit mask).
*/
enum VectorRestrictions {
- kNone = 0, // no restrictions
- kNoMul = 1, // no multiplication
- kNoDiv = 2, // no division
- kNoShift = 4, // no shift
- kNoShr = 8, // no arithmetic shift right
- kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
- kNoAbs = 32, // no absolute value
+ kNone = 0, // no restrictions
+ kNoMul = 1, // no multiplication
+ kNoDiv = 2, // no division
+ kNoShift = 4, // no shift
+ kNoShr = 8, // no arithmetic shift right
+ kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
+ kNoSignedHAdd = 32, // no signed halving add
+ kNoUnroundedHAdd = 64, // no unrounded halving add
+ kNoAbs = 128, // no absolute value
};
/*
@@ -136,6 +138,13 @@ class HLoopOptimization : public HOptimization {
Primitive::Type type);
void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
+ // Vectorization idioms.
+ bool VectorizeHalvingAddIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions);
+
// Helpers.
bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
bool TrySetSimpleLoopHeader(HBasicBlock* block);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c109369106..6be237e612 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1369,9 +1369,12 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(VecAbs, VecUnaryOperation) \
M(VecNot, VecUnaryOperation) \
M(VecAdd, VecBinaryOperation) \
+ M(VecHalvingAdd, VecBinaryOperation) \
M(VecSub, VecBinaryOperation) \
M(VecMul, VecBinaryOperation) \
M(VecDiv, VecBinaryOperation) \
+ M(VecMin, VecBinaryOperation) \
+ M(VecMax, VecBinaryOperation) \
M(VecAnd, VecBinaryOperation) \
M(VecAndNot, VecBinaryOperation) \
M(VecOr, VecBinaryOperation) \
@@ -6845,6 +6848,7 @@ class HBlocksInLoopReversePostOrderIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator);
};
+// Returns int64_t value of a properly typed constant.
inline int64_t Int64FromConstant(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
@@ -6856,6 +6860,21 @@ inline int64_t Int64FromConstant(HConstant* constant) {
}
}
+// Returns true iff instruction is an integral constant (and sets value on success).
+inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) {
+ if (instruction->IsIntConstant()) {
+ *value = instruction->AsIntConstant()->GetValue();
+ return true;
+ } else if (instruction->IsLongConstant()) {
+ *value = instruction->AsLongConstant()->GetValue();
+ return true;
+ } else if (instruction->IsNullConstant()) {
+ *value = 0;
+ return true;
+ }
+ return false;
+}
+
#define INSTRUCTION_TYPE_CHECK(type, super) \
inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \
inline const H##type* HInstruction::As##type() const { \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 0cbbf2a215..bff58d0910 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -338,6 +338,42 @@ class HVecAdd FINAL : public HVecBinaryOperation {
DISALLOW_COPY_AND_ASSIGN(HVecAdd);
};
+// Performs halving add on every component in the two vectors, viz.
+// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
+// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
+// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension).
+class HVecHalvingAdd FINAL : public HVecBinaryOperation {
+ public:
+ HVecHalvingAdd(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ bool is_unsigned,
+ bool is_rounded,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc),
+ is_unsigned_(is_unsigned),
+ is_rounded_(is_rounded) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+
+ bool IsUnsigned() const { return is_unsigned_; }
+ bool IsRounded() const { return is_rounded_; }
+
+ DECLARE_INSTRUCTION(VecHalvingAdd);
+
+ private:
+ bool is_unsigned_;
+ bool is_rounded_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecHalvingAdd);
+};
+
// Subtracts every component in the two vectors,
// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ].
class HVecSub FINAL : public HVecBinaryOperation {
@@ -404,6 +440,50 @@ class HVecDiv FINAL : public HVecBinaryOperation {
DISALLOW_COPY_AND_ASSIGN(HVecDiv);
};
+// Takes minimum of every component in the two vectors,
+// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ].
+class HVecMin FINAL : public HVecBinaryOperation {
+ public:
+ HVecMin(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecMin);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecMin);
+};
+
+// Takes maximum of every component in the two vectors,
+// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ].
+class HVecMax FINAL : public HVecBinaryOperation {
+ public:
+ HVecMax(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecMax);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecMax);
+};
+
// Bitwise-ands every component in the two vectors,
// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ].
class HVecAnd FINAL : public HVecBinaryOperation {