diff options
author | 2017-05-10 10:49:22 -0700 | |
---|---|---|
committer | 2017-05-15 11:44:58 -0700 | |
commit | c8e93c736c149ce41be073dd24324fb08afb9ae4 (patch) | |
tree | 8e7154cf1bbcee8f5837ee9cb930174e2516ac03 /compiler | |
parent | 92f4672f811a4eccdc596f7c2235804abd196fde (diff) |
Min/max SIMDization support.
Rationale:
The more vectorized, the better!
Test: test-art-target, test-art-host
Change-Id: I758becca5beaa5b97fab2ab70f2e00cb53458703
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm64.cc | 90 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86.cc | 92 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86_64.cc | 96 | ||||
-rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 50 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/nodes_vector.h | 22 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 133 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.h | 19 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 64 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 150 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 19 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 64 |
13 files changed, 800 insertions, 8 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 57f7e6b25c..0739c6e9a1 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -468,7 +468,50 @@ void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B()); + } else { + __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B()); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H()); + } else { + __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); + } else { + __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); + } + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { @@ -476,7 +519,50 @@ void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B()); + } else { + __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B()); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H()); + } else { + __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); + } else { + __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); + } + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 5bb19c193c..14782d70a1 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -483,7 +483,51 @@ void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminub(dst, src); + } else { + __ pminsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminuw(dst, src); + } else { + __ pminsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminud(dst, src); + } else { + __ pminsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { @@ -491,7 +535,51 @@ void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxub(dst, src); + } else { + __ pmaxsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxuw(dst, src); + } else { + __ pmaxsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxud(dst, src); + } else { + __ pmaxsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 6d4aae86e6..246044ebb8 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -353,6 +353,10 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + DCHECK(instruction->IsRounded()); + DCHECK(instruction->IsUnsigned()); + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -472,7 +476,51 @@ void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminub(dst, src); + } else { + __ pminsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminuw(dst, src); + } else { + __ pminsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminud(dst, src); + } else { + __ pminsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { @@ -480,7 +528,51 @@ void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxub(dst, src); + } else { + __ pmaxsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxuw(dst, src); + } else { + __ pmaxsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxud(dst, src); + } else { + __ pmaxsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index e5d94c3504..02816cf7ce 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -514,6 +514,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } + void VisitVecMin(HVecMin* min) OVERRIDE { + StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha; + } + + void VisitVecMax(HVecMax* max) OVERRIDE { + StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha; + } + void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetOpKind(); } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 881802d714..4067aa3468 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -869,6 +869,32 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return false; } + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathMinDoubleDouble: + case Intrinsics::kMathMaxIntInt: + case Intrinsics::kMathMaxLongLong: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMaxDoubleDouble: { + // Deal with vector restrictions. + if (HasVectorRestrictions(restrictions, kNoMinMax) || + HasVectorRestrictions(restrictions, kNoHiBits)) { + // TODO: we can do better for some hibits cases. + return false; + } + // Accept MIN/MAX(x, y) for vectorizable operands. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + if (VectorizeUse(node, opa, generate_code, type, restrictions) && + VectorizeUse(node, opb, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type); + } + return true; + } + return false; + } default: return false; } // switch @@ -898,7 +924,7 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoDiv; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv | kNoMul; + *restrictions |= kNoDiv | kNoMul | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: return TrySetVectorLength(4); @@ -924,11 +950,13 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoDiv; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs; + *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: + *restrictions |= kNoMinMax; // -0.0 vs +0.0 return TrySetVectorLength(4); case Primitive::kPrimDouble: + *restrictions |= kNoMinMax; // -0.0 vs +0.0 return TrySetVectorLength(2); default: break; @@ -1108,6 +1136,24 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, DCHECK(opb == nullptr); vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_); break; + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathMinDoubleDouble: { + bool is_unsigned = false; // TODO: detect unsigned versions + vector = new (global_allocator_) + HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned); + break; + } + case Intrinsics::kMathMaxIntInt: + case Intrinsics::kMathMaxLongLong: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMaxDoubleDouble: { + bool is_unsigned = false; // TODO: detect unsigned versions + vector = new (global_allocator_) + HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned); + break; + } default: LOG(FATAL) << "Unsupported SIMD intrinsic"; UNREACHABLE(); diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 4a7da86e32..6d5978d337 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -71,6 +71,7 @@ class HLoopOptimization : public HOptimization { kNoSignedHAdd = 32, // no signed halving add kNoUnroundedHAdd = 64, // no unrounded halving add kNoAbs = 128, // no absolute value + kNoMinMax = 256, // no min/max }; /* diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 52c247b52f..c2bb6e79c0 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -451,13 +451,24 @@ class HVecMin FINAL : public HVecBinaryOperation { HInstruction* right, Primitive::Type packed_type, size_t vector_length, + bool is_unsigned, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); + SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned); } + + bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); } + DECLARE_INSTRUCTION(VecMin); + private: + // Additional packed bits. + static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1; + static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + DISALLOW_COPY_AND_ASSIGN(HVecMin); }; @@ -470,13 +481,24 @@ class HVecMax FINAL : public HVecBinaryOperation { HInstruction* right, Primitive::Type packed_type, size_t vector_length, + bool is_unsigned, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); + SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned); } + + bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); } + DECLARE_INSTRUCTION(VecMax); + private: + // Additional packed bits. + static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1; + static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + DISALLOW_COPY_AND_ASSIGN(HVecMax); }; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 1736618363..bef32f8254 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1238,6 +1238,139 @@ void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } +void X86Assembler::pminsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x38); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3C); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEA); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEE); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x39); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDA); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDE); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3A); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3E); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3B); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3F); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::minps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::maxps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::minpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::maxpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst, src); +} void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index a747cda7bd..c4bb9ee18a 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -498,6 +498,25 @@ class X86Assembler FINAL : public Assembler { void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pavgw(XmmRegister dst, XmmRegister src); + void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxsb(XmmRegister dst, XmmRegister src); + void pminsw(XmmRegister dst, XmmRegister src); + void pmaxsw(XmmRegister dst, XmmRegister src); + void pminsd(XmmRegister dst, XmmRegister src); + void pmaxsd(XmmRegister dst, XmmRegister src); + + void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxub(XmmRegister dst, XmmRegister src); + void pminuw(XmmRegister dst, XmmRegister src); + void pmaxuw(XmmRegister dst, XmmRegister src); + void pminud(XmmRegister dst, XmmRegister src); + void pmaxud(XmmRegister dst, XmmRegister src); + + void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void maxps(XmmRegister dst, XmmRegister src); + void minpd(XmmRegister dst, XmmRegister src); + void maxpd(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); void pcmpeqw(XmmRegister dst, XmmRegister src); void pcmpeqd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index f75f972265..34f2a47c27 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -613,6 +613,70 @@ TEST_F(AssemblerX86Test, PAvgW) { DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); } +TEST_F(AssemblerX86Test, PMinSB) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb"); +} + +TEST_F(AssemblerX86Test, PMaxSB) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb"); +} + +TEST_F(AssemblerX86Test, PMinSW) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw"); +} + +TEST_F(AssemblerX86Test, PMaxSW) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw"); +} + +TEST_F(AssemblerX86Test, PMinSD) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd"); +} + +TEST_F(AssemblerX86Test, PMaxSD) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd"); +} + +TEST_F(AssemblerX86Test, PMinUB) { + DriverStr(RepeatFF(&x86::X86Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub"); +} + +TEST_F(AssemblerX86Test, PMaxUB) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub"); +} + +TEST_F(AssemblerX86Test, PMinUW) { + DriverStr(RepeatFF(&x86::X86Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw"); +} + +TEST_F(AssemblerX86Test, PMaxUW) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw"); +} + +TEST_F(AssemblerX86Test, PMinUD) { + DriverStr(RepeatFF(&x86::X86Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud"); +} + +TEST_F(AssemblerX86Test, PMaxUD) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud"); +} + +TEST_F(AssemblerX86Test, MinPS) { + DriverStr(RepeatFF(&x86::X86Assembler::minps, "minps %{reg2}, %{reg1}"), "minps"); +} + +TEST_F(AssemblerX86Test, MaxPS) { + DriverStr(RepeatFF(&x86::X86Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps"); +} + +TEST_F(AssemblerX86Test, MinPD) { + DriverStr(RepeatFF(&x86::X86Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd"); +} + +TEST_F(AssemblerX86Test, MaxPD) { + DriverStr(RepeatFF(&x86::X86Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd"); +} + TEST_F(AssemblerX86Test, PCmpeqB) { DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 1b7a4850db..82d1174a25 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1445,6 +1445,156 @@ void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x38); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3C); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEA); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEE); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x39); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDA); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDE); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3A); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3E); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3B); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 0ddc46ca44..6e584fece1 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -526,6 +526,25 @@ class X86_64Assembler FINAL : public Assembler { void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pavgw(XmmRegister dst, XmmRegister src); + void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxsb(XmmRegister dst, XmmRegister src); + void pminsw(XmmRegister dst, XmmRegister src); + void pmaxsw(XmmRegister dst, XmmRegister src); + void pminsd(XmmRegister dst, XmmRegister src); + void pmaxsd(XmmRegister dst, XmmRegister src); + + void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxub(XmmRegister dst, XmmRegister src); + void pminuw(XmmRegister dst, XmmRegister src); + void pmaxuw(XmmRegister dst, XmmRegister src); + void pminud(XmmRegister dst, XmmRegister src); + void pmaxud(XmmRegister dst, XmmRegister src); + + void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void maxps(XmmRegister dst, XmmRegister src); + void minpd(XmmRegister dst, XmmRegister src); + void maxpd(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); void pcmpeqw(XmmRegister dst, XmmRegister src); void pcmpeqd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index e7d8401e29..b57400334c 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1301,6 +1301,70 @@ TEST_F(AssemblerX86_64Test, Pavgw) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); } +TEST_F(AssemblerX86_64Test, Pminsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb"); +} + +TEST_F(AssemblerX86_64Test, Pminsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw"); +} + +TEST_F(AssemblerX86_64Test, Pminsd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd"); +} + +TEST_F(AssemblerX86_64Test, Pminub) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub"); +} + +TEST_F(AssemblerX86_64Test, Pmaxub) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub"); +} + +TEST_F(AssemblerX86_64Test, Pminuw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw"); +} + +TEST_F(AssemblerX86_64Test, Pmaxuw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw"); +} + +TEST_F(AssemblerX86_64Test, Pminud) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud"); +} + +TEST_F(AssemblerX86_64Test, Pmaxud) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud"); +} + +TEST_F(AssemblerX86_64Test, Minps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::minps, "minps %{reg2}, %{reg1}"), "minps"); +} + +TEST_F(AssemblerX86_64Test, Maxps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps"); +} + +TEST_F(AssemblerX86_64Test, Minpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd"); +} + +TEST_F(AssemblerX86_64Test, Maxpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd"); +} + TEST_F(AssemblerX86_64Test, PCmpeqb) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb"); } |