summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author Aart Bik <ajcbik@google.com> 2017-05-16 15:56:01 +0000
committer Gerrit Code Review <noreply-gerritcodereview@google.com> 2017-05-16 15:56:04 +0000
commita1633a7077781d9c64a77b27deb1707d1a56906d (patch)
tree505f2560cfd247b2e1aab86d3ab96e5c399cb05d /compiler/optimizing
parenta774575ae3af3d46955f941ddd08a79caf2aaa94 (diff)
parentc8e93c736c149ce41be073dd24324fb08afb9ae4 (diff)
Merge "Min/max SIMDization support."
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc90
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc92
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc96
-rw-r--r--compiler/optimizing/graph_visualizer.cc8
-rw-r--r--compiler/optimizing/loop_optimization.cc50
-rw-r--r--compiler/optimizing/loop_optimization.h1
-rw-r--r--compiler/optimizing/nodes_vector.h22
7 files changed, 351 insertions, 8 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 478bd24388..a41adca02c 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -468,7 +468,50 @@ void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
}
void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ } else {
+ __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ }
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
@@ -476,7 +519,50 @@ void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
}
void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ } else {
+ __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ }
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 5bb19c193c..14782d70a1 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -483,7 +483,51 @@ void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
}
void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminub(dst, src);
+ } else {
+ __ pminsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminuw(dst, src);
+ } else {
+ __ pminsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminud(dst, src);
+ } else {
+ __ pminsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
@@ -491,7 +535,51 @@ void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
}
void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxub(dst, src);
+ } else {
+ __ pmaxsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxuw(dst, src);
+ } else {
+ __ pmaxsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxud(dst, src);
+ } else {
+ __ pmaxsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 6d4aae86e6..246044ebb8 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -353,6 +353,10 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct
DCHECK(locations->InAt(0).Equals(locations->Out()));
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK(instruction->IsRounded());
+ DCHECK(instruction->IsUnsigned());
+
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
@@ -472,7 +476,51 @@ void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminub(dst, src);
+ } else {
+ __ pminsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminuw(dst, src);
+ } else {
+ __ pminsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pminud(dst, src);
+ } else {
+ __ pminsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ minpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
@@ -480,7 +528,51 @@ void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxub(dst, src);
+ } else {
+ __ pmaxsb(dst, src);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxuw(dst, src);
+ } else {
+ __ pmaxsw(dst, src);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ pmaxud(dst, src);
+ } else {
+ __ pmaxsd(dst, src);
+ }
+ break;
+ // Next cases are sloppy wrt 0.0 vs -0.0.
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ DCHECK(!instruction->IsUnsigned());
+ __ maxpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index e5d94c3504..02816cf7ce 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -514,6 +514,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
}
+ void VisitVecMin(HVecMin* min) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha;
+ }
+
+ void VisitVecMax(HVecMax* max) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha;
+ }
+
void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
}
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 881802d714..4067aa3468 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -869,6 +869,32 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
return false;
}
+ case Intrinsics::kMathMinIntInt:
+ case Intrinsics::kMathMinLongLong:
+ case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble:
+ case Intrinsics::kMathMaxIntInt:
+ case Intrinsics::kMathMaxLongLong:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMaxDoubleDouble: {
+ // Deal with vector restrictions.
+ if (HasVectorRestrictions(restrictions, kNoMinMax) ||
+ HasVectorRestrictions(restrictions, kNoHiBits)) {
+ // TODO: we can do better for some hibits cases.
+ return false;
+ }
+ // Accept MIN/MAX(x, y) for vectorizable operands.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+ VectorizeUse(node, opb, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+ }
+ return true;
+ }
+ return false;
+ }
default:
return false;
} // switch
@@ -898,7 +924,7 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
*restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoMul;
+ *restrictions |= kNoDiv | kNoMul | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
return TrySetVectorLength(4);
@@ -924,11 +950,13 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
*restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs;
+ *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(2);
default:
break;
@@ -1108,6 +1136,24 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
DCHECK(opb == nullptr);
vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_);
break;
+ case Intrinsics::kMathMinIntInt:
+ case Intrinsics::kMathMinLongLong:
+ case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble: {
+ bool is_unsigned = false; // TODO: detect unsigned versions
+ vector = new (global_allocator_)
+ HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+ break;
+ }
+ case Intrinsics::kMathMaxIntInt:
+ case Intrinsics::kMathMaxLongLong:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMaxDoubleDouble: {
+ bool is_unsigned = false; // TODO: detect unsigned versions
+ vector = new (global_allocator_)
+ HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
+ break;
+ }
default:
LOG(FATAL) << "Unsupported SIMD intrinsic";
UNREACHABLE();
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 4a7da86e32..6d5978d337 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -71,6 +71,7 @@ class HLoopOptimization : public HOptimization {
kNoSignedHAdd = 32, // no signed halving add
kNoUnroundedHAdd = 64, // no unrounded halving add
kNoAbs = 128, // no absolute value
+ kNoMinMax = 256, // no min/max
};
/*
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 92fe9bfa7d..5dbe29b4fa 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -456,13 +456,24 @@ class HVecMin FINAL : public HVecBinaryOperation {
HInstruction* right,
Primitive::Type packed_type,
size_t vector_length,
+ bool is_unsigned,
uint32_t dex_pc = kNoDexPc)
: HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
+ SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned);
}
+
+ bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); }
+
DECLARE_INSTRUCTION(VecMin);
+
private:
+ // Additional packed bits.
+ static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1;
+ static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
DISALLOW_COPY_AND_ASSIGN(HVecMin);
};
@@ -475,13 +486,24 @@ class HVecMax FINAL : public HVecBinaryOperation {
HInstruction* right,
Primitive::Type packed_type,
size_t vector_length,
+ bool is_unsigned,
uint32_t dex_pc = kNoDexPc)
: HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
+ SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned);
}
+
+ bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); }
+
DECLARE_INSTRUCTION(VecMax);
+
private:
+ // Additional packed bits.
+ static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1;
+ static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+
DISALLOW_COPY_AND_ASSIGN(HVecMax);
};