diff options
author | 2017-05-10 10:49:22 -0700 | |
---|---|---|
committer | 2017-05-15 11:44:58 -0700 | |
commit | c8e93c736c149ce41be073dd24324fb08afb9ae4 (patch) | |
tree | 8e7154cf1bbcee8f5837ee9cb930174e2516ac03 | |
parent | 92f4672f811a4eccdc596f7c2235804abd196fde (diff) |
Min/max SIMDization support.
Rationale:
The more vectorized, the better!
Test: test-art-target, test-art-host
Change-Id: I758becca5beaa5b97fab2ab70f2e00cb53458703
35 files changed, 1649 insertions, 9 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 57f7e6b25c..0739c6e9a1 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -468,7 +468,50 @@ void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B()); + } else { + __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B()); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H()); + } else { + __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); + } else { + __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); + } + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { @@ -476,7 +519,50 @@ void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B()); + } else { + __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B()); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H()); + } else { + __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); + } else { + __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); + } + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 5bb19c193c..14782d70a1 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -483,7 +483,51 @@ void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminub(dst, src); + } else { + __ pminsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminuw(dst, src); + } else { + __ pminsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminud(dst, src); + } else { + __ pminsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { @@ -491,7 +535,51 @@ void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxub(dst, src); + } else { + __ pmaxsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxuw(dst, src); + } else { + __ pmaxsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxud(dst, src); + } else { + __ pmaxsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 6d4aae86e6..246044ebb8 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -353,6 +353,10 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + DCHECK(instruction->IsRounded()); + DCHECK(instruction->IsUnsigned()); + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -472,7 +476,51 @@ void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminub(dst, src); + } else { + __ pminsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminuw(dst, src); + } else { + __ pminsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pminud(dst, src); + } else { + __ pminsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ minpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { @@ -480,7 +528,51 @@ void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxub(dst, src); + } else { + __ pmaxsb(dst, src); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxuw(dst, src); + } else { + __ pmaxsw(dst, src); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ pmaxud(dst, src); + } else { + __ pmaxsd(dst, src); + } + break; + // Next cases are sloppy wrt 0.0 vs -0.0. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ maxpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index e5d94c3504..02816cf7ce 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -514,6 +514,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } + void VisitVecMin(HVecMin* min) OVERRIDE { + StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha; + } + + void VisitVecMax(HVecMax* max) OVERRIDE { + StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha; + } + void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetOpKind(); } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 881802d714..4067aa3468 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -869,6 +869,32 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return false; } + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathMinDoubleDouble: + case Intrinsics::kMathMaxIntInt: + case Intrinsics::kMathMaxLongLong: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMaxDoubleDouble: { + // Deal with vector restrictions. + if (HasVectorRestrictions(restrictions, kNoMinMax) || + HasVectorRestrictions(restrictions, kNoHiBits)) { + // TODO: we can do better for some hibits cases. + return false; + } + // Accept MIN/MAX(x, y) for vectorizable operands. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + if (VectorizeUse(node, opa, generate_code, type, restrictions) && + VectorizeUse(node, opb, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type); + } + return true; + } + return false; + } default: return false; } // switch @@ -898,7 +924,7 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoDiv; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv | kNoMul; + *restrictions |= kNoDiv | kNoMul | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: return TrySetVectorLength(4); @@ -924,11 +950,13 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric *restrictions |= kNoDiv; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs; + *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax; return TrySetVectorLength(2); case Primitive::kPrimFloat: + *restrictions |= kNoMinMax; // -0.0 vs +0.0 return TrySetVectorLength(4); case Primitive::kPrimDouble: + *restrictions |= kNoMinMax; // -0.0 vs +0.0 return TrySetVectorLength(2); default: break; @@ -1108,6 +1136,24 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, DCHECK(opb == nullptr); vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_); break; + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathMinDoubleDouble: { + bool is_unsigned = false; // TODO: detect unsigned versions + vector = new (global_allocator_) + HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned); + break; + } + case Intrinsics::kMathMaxIntInt: + case Intrinsics::kMathMaxLongLong: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMaxDoubleDouble: { + bool is_unsigned = false; // TODO: detect unsigned versions + vector = new (global_allocator_) + HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned); + break; + } default: LOG(FATAL) << "Unsupported SIMD intrinsic"; UNREACHABLE(); diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 4a7da86e32..6d5978d337 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -71,6 +71,7 @@ class HLoopOptimization : public HOptimization { kNoSignedHAdd = 32, // no signed halving add kNoUnroundedHAdd = 64, // no unrounded halving add kNoAbs = 128, // no absolute value + kNoMinMax = 256, // no min/max }; /* diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 52c247b52f..c2bb6e79c0 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -451,13 +451,24 @@ class HVecMin FINAL : public HVecBinaryOperation { HInstruction* right, Primitive::Type packed_type, size_t vector_length, + bool is_unsigned, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); + SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned); } + + bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); } + DECLARE_INSTRUCTION(VecMin); + private: + // Additional packed bits. + static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1; + static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + DISALLOW_COPY_AND_ASSIGN(HVecMin); }; @@ -470,13 +481,24 @@ class HVecMax FINAL : public HVecBinaryOperation { HInstruction* right, Primitive::Type packed_type, size_t vector_length, + bool is_unsigned, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); + SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned); } + + bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); } + DECLARE_INSTRUCTION(VecMax); + private: + // Additional packed bits. + static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1; + static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + DISALLOW_COPY_AND_ASSIGN(HVecMax); }; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 1736618363..bef32f8254 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1238,6 +1238,139 @@ void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } +void X86Assembler::pminsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x38); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3C); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEA); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEE); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x39); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDA); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDE); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3A); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3E); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pminud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3B); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::pmaxud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3F); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::minps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::maxps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::minpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst, src); +} + +void X86Assembler::maxpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst, src); +} void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index a747cda7bd..c4bb9ee18a 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -498,6 +498,25 @@ class X86Assembler FINAL : public Assembler { void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pavgw(XmmRegister dst, XmmRegister src); + void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxsb(XmmRegister dst, XmmRegister src); + void pminsw(XmmRegister dst, XmmRegister src); + void pmaxsw(XmmRegister dst, XmmRegister src); + void pminsd(XmmRegister dst, XmmRegister src); + void pmaxsd(XmmRegister dst, XmmRegister src); + + void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxub(XmmRegister dst, XmmRegister src); + void pminuw(XmmRegister dst, XmmRegister src); + void pmaxuw(XmmRegister dst, XmmRegister src); + void pminud(XmmRegister dst, XmmRegister src); + void pmaxud(XmmRegister dst, XmmRegister src); + + void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void maxps(XmmRegister dst, XmmRegister src); + void minpd(XmmRegister dst, XmmRegister src); + void maxpd(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); void pcmpeqw(XmmRegister dst, XmmRegister src); void pcmpeqd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index f75f972265..34f2a47c27 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -613,6 +613,70 @@ TEST_F(AssemblerX86Test, PAvgW) { DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); } +TEST_F(AssemblerX86Test, PMinSB) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb"); +} + +TEST_F(AssemblerX86Test, PMaxSB) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb"); +} + +TEST_F(AssemblerX86Test, PMinSW) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw"); +} + +TEST_F(AssemblerX86Test, PMaxSW) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw"); +} + +TEST_F(AssemblerX86Test, PMinSD) { + DriverStr(RepeatFF(&x86::X86Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd"); +} + +TEST_F(AssemblerX86Test, PMaxSD) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd"); +} + +TEST_F(AssemblerX86Test, PMinUB) { + DriverStr(RepeatFF(&x86::X86Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub"); +} + +TEST_F(AssemblerX86Test, PMaxUB) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub"); +} + +TEST_F(AssemblerX86Test, PMinUW) { + DriverStr(RepeatFF(&x86::X86Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw"); +} + +TEST_F(AssemblerX86Test, PMaxUW) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw"); +} + +TEST_F(AssemblerX86Test, PMinUD) { + DriverStr(RepeatFF(&x86::X86Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud"); +} + +TEST_F(AssemblerX86Test, PMaxUD) { + DriverStr(RepeatFF(&x86::X86Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud"); +} + +TEST_F(AssemblerX86Test, MinPS) { + DriverStr(RepeatFF(&x86::X86Assembler::minps, "minps %{reg2}, %{reg1}"), "minps"); +} + +TEST_F(AssemblerX86Test, MaxPS) { + DriverStr(RepeatFF(&x86::X86Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps"); +} + +TEST_F(AssemblerX86Test, MinPD) { + DriverStr(RepeatFF(&x86::X86Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd"); +} + +TEST_F(AssemblerX86Test, MaxPD) { + DriverStr(RepeatFF(&x86::X86Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd"); +} + TEST_F(AssemblerX86Test, PCmpeqB) { DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 1b7a4850db..82d1174a25 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1445,6 +1445,156 @@ void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x38); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3C); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEA); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEE); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x39); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDA); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDE); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3A); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3E); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3B); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x3F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5F); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 0ddc46ca44..6e584fece1 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -526,6 +526,25 @@ class X86_64Assembler FINAL : public Assembler { void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pavgw(XmmRegister dst, XmmRegister src); + void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxsb(XmmRegister dst, XmmRegister src); + void pminsw(XmmRegister dst, XmmRegister src); + void pmaxsw(XmmRegister dst, XmmRegister src); + void pminsd(XmmRegister dst, XmmRegister src); + void pmaxsd(XmmRegister dst, XmmRegister src); + + void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pmaxub(XmmRegister dst, XmmRegister src); + void pminuw(XmmRegister dst, XmmRegister src); + void pmaxuw(XmmRegister dst, XmmRegister src); + void pminud(XmmRegister dst, XmmRegister src); + void pmaxud(XmmRegister dst, XmmRegister src); + + void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void maxps(XmmRegister dst, XmmRegister src); + void minpd(XmmRegister dst, XmmRegister src); + void maxpd(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); void pcmpeqw(XmmRegister dst, XmmRegister src); void pcmpeqd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index e7d8401e29..b57400334c 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1301,6 +1301,70 @@ TEST_F(AssemblerX86_64Test, Pavgw) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); } +TEST_F(AssemblerX86_64Test, Pminsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsb, "pmaxsb %{reg2}, %{reg1}"), "pmaxsb"); +} + +TEST_F(AssemblerX86_64Test, Pminsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsw, "pminsw %{reg2}, %{reg1}"), "pminsw"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsw, "pmaxsw %{reg2}, %{reg1}"), "pmaxsw"); +} + +TEST_F(AssemblerX86_64Test, Pminsd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsd, "pminsd %{reg2}, %{reg1}"), "pminsd"); +} + +TEST_F(AssemblerX86_64Test, Pmaxsd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxsd, "pmaxsd %{reg2}, %{reg1}"), "pmaxsd"); +} + +TEST_F(AssemblerX86_64Test, Pminub) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminub, "pminub %{reg2}, %{reg1}"), "pminub"); +} + +TEST_F(AssemblerX86_64Test, Pmaxub) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxub, "pmaxub %{reg2}, %{reg1}"), "pmaxub"); +} + +TEST_F(AssemblerX86_64Test, Pminuw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminuw, "pminuw %{reg2}, %{reg1}"), "pminuw"); +} + +TEST_F(AssemblerX86_64Test, Pmaxuw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxuw, "pmaxuw %{reg2}, %{reg1}"), "pmaxuw"); +} + +TEST_F(AssemblerX86_64Test, Pminud) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminud, "pminud %{reg2}, %{reg1}"), "pminud"); +} + +TEST_F(AssemblerX86_64Test, Pmaxud) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaxud, "pmaxud %{reg2}, %{reg1}"), "pmaxud"); +} + +TEST_F(AssemblerX86_64Test, Minps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::minps, "minps %{reg2}, %{reg1}"), "minps"); +} + +TEST_F(AssemblerX86_64Test, Maxps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxps, "maxps %{reg2}, %{reg1}"), "maxps"); +} + +TEST_F(AssemblerX86_64Test, Minpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::minpd, "minpd %{reg2}, %{reg1}"), "minpd"); +} + +TEST_F(AssemblerX86_64Test, Maxpd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::maxpd, "maxpd %{reg2}, %{reg1}"), "maxpd"); +} + TEST_F(AssemblerX86_64Test, PCmpeqb) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb"); } diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index e12bcec776..4824f70a28 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -581,13 +581,69 @@ DISASSEMBLER_ENTRY(cmp, load = true; src_reg_file = dst_reg_file = SSE; break; - case 0x39: + case 0x37: opcode1 = "pcmpgtq"; prefix[2] = 0; has_modrm = true; load = true; src_reg_file = dst_reg_file = SSE; break; + case 0x38: + opcode1 = "pminsb"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; + case 0x39: + opcode1 = "pminsd"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; + case 0x3A: + opcode1 = "pminuw"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; + case 0x3B: + opcode1 = "pminud"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; + case 0x3C: + opcode1 = "pmaxsb"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; + case 0x3D: + opcode1 = "pmaxsd"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; + case 0x3E: + opcode1 = "pmaxuw"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; + case 0x3F: + opcode1 = "pmaxud"; + prefix[2] = 0; + has_modrm = true; + load = true; + src_reg_file = dst_reg_file = SSE; + break; case 0x40: opcode1 = "pmulld"; prefix[2] = 0; @@ -1133,8 +1189,12 @@ DISASSEMBLER_ENTRY(cmp, opcode1 = opcode_tmp.c_str(); } break; + case 0xDA: + case 0xDE: case 0xE0: case 0xE3: + case 0xEA: + case 0xEE: if (prefix[2] == 0x66) { src_reg_file = dst_reg_file = SSE; prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode @@ -1142,8 +1202,12 @@ DISASSEMBLER_ENTRY(cmp, src_reg_file = dst_reg_file = MMX; } switch (*instr) { + case 0xDA: opcode1 = "pminub"; break; + case 0xDE: opcode1 = "pmaxub"; break; case 0xE0: opcode1 = "pavgb"; break; case 0xE3: opcode1 = "pavgw"; break; + case 0xEA: opcode1 = "pminsw"; break; + case 0xEE: opcode1 = "pmaxsw"; break; } prefix[2] = 0; has_modrm = true; diff --git a/test/651-checker-byte-simd-minmax/expected.txt b/test/651-checker-byte-simd-minmax/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/651-checker-byte-simd-minmax/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/651-checker-byte-simd-minmax/info.txt b/test/651-checker-byte-simd-minmax/info.txt new file mode 100644 index 0000000000..73af1242c0 --- /dev/null +++ b/test/651-checker-byte-simd-minmax/info.txt @@ -0,0 +1 @@ +Functional tests on min/max SIMD vectorization. diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java new file mode 100644 index 0000000000..8211ace741 --- /dev/null +++ b/test/651-checker-byte-simd-minmax/src/Main.java @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for MIN/MAX vectorization. + */ +public class Main { + + /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + // TODO: narrow type vectorization. + /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecMin + private static void doitMin(byte[] x, byte[] y, byte[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (byte) Math.min(y[i], z[i]); + } + } + + /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + // TODO: narrow type vectorization. + /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecMax + private static void doitMax(byte[] x, byte[] y, byte[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (byte) Math.max(y[i], z[i]); + } + } + + public static void main(String[] args) { + // Initialize cross-values for all possible values. + int total = 256 * 256; + byte[] x = new byte[total]; + byte[] y = new byte[total]; + byte[] z = new byte[total]; + int k = 0; + for (int i = 0; i < 256; i++) { + for (int j = 0; j < 256; j++) { + x[k] = 0; + y[k] = (byte) i; + z[k] = (byte) j; + k++; + } + } + + // And test. + doitMin(x, y, z); + for (int i = 0; i < total; i++) { + byte expected = (byte) Math.min(y[i], z[i]); + expectEquals(expected, x[i]); + } + doitMax(x, y, z); + for (int i = 0; i < total; i++) { + byte expected = (byte) Math.max(y[i], z[i]); + expectEquals(expected, x[i]); + } + + System.out.println("passed"); + } + + private static void expectEquals(byte expected, byte result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/651-checker-char-simd-minmax/expected.txt b/test/651-checker-char-simd-minmax/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/651-checker-char-simd-minmax/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/651-checker-char-simd-minmax/info.txt b/test/651-checker-char-simd-minmax/info.txt new file mode 100644 index 0000000000..73af1242c0 --- /dev/null +++ b/test/651-checker-char-simd-minmax/info.txt @@ -0,0 +1 @@ +Functional tests on min/max SIMD vectorization. diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java new file mode 100644 index 0000000000..5ce7b94bf4 --- /dev/null +++ b/test/651-checker-char-simd-minmax/src/Main.java @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for MIN/MAX vectorization. + */ +public class Main { + + /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + // TODO: narrow type vectorization. + /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecMin + private static void doitMin(char[] x, char[] y, char[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (char) Math.min(y[i], z[i]); + } + } + + /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + // TODO: narrow type vectorization. + /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecMax + private static void doitMax(char[] x, char[] y, char[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (char) Math.max(y[i], z[i]); + } + } + + public static void main(String[] args) { + char[] interesting = { + 0x0000, 0x0001, 0x007f, 0x0080, 0x0081, 0x00ff, + 0x0100, 0x0101, 0x017f, 0x0180, 0x0181, 0x01ff, + 0x7f00, 0x7f01, 0x7f7f, 0x7f80, 0x7f81, 0x7fff, + 0x8000, 0x8001, 0x807f, 0x8080, 0x8081, 0x80ff, + 0x8100, 0x8101, 0x817f, 0x8180, 0x8181, 0x81ff, + 0xff00, 0xff01, 0xff7f, 0xff80, 0xff81, 0xffff + }; + // Initialize cross-values for the interesting values. + int total = interesting.length * interesting.length; + char[] x = new char[total]; + char[] y = new char[total]; + char[] z = new char[total]; + int k = 0; + for (int i = 0; i < interesting.length; i++) { + for (int j = 0; j < interesting.length; j++) { + x[k] = 0; + y[k] = interesting[i]; + z[k] = interesting[j]; + k++; + } + } + + // And test. + doitMin(x, y, z); + for (int i = 0; i < total; i++) { + char expected = (char) Math.min(y[i], z[i]); + expectEquals(expected, x[i]); + } + doitMax(x, y, z); + for (int i = 0; i < total; i++) { + char expected = (char) Math.max(y[i], z[i]); + expectEquals(expected, x[i]); + } + + System.out.println("passed"); + } + + private static void expectEquals(char expected, char result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/651-checker-double-simd-minmax/expected.txt b/test/651-checker-double-simd-minmax/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/651-checker-double-simd-minmax/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/651-checker-double-simd-minmax/info.txt b/test/651-checker-double-simd-minmax/info.txt new file mode 100644 index 0000000000..73af1242c0 --- /dev/null +++ b/test/651-checker-double-simd-minmax/info.txt @@ -0,0 +1 @@ +Functional tests on min/max SIMD vectorization. diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java new file mode 100644 index 0000000000..e1711aef60 --- /dev/null +++ b/test/651-checker-double-simd-minmax/src/Main.java @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for MIN/MAX vectorization. + */ +public class Main { + + /// CHECK-START: void Main.doitMin(double[], double[], double[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinDoubleDouble loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + // + // TODO x86: 0.0 vs -0.0? + // + /// CHECK-START-ARM64: void Main.doitMin(double[], double[], double[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + private static void doitMin(double[] x, double[] y, double[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = Math.min(y[i], z[i]); + } + } + + /// CHECK-START: void Main.doitMax(double[], double[], double[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + // + // TODO-x86: 0.0 vs -0.0? + // + /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + private static void doitMax(double[] x, double[] y, double[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = Math.max(y[i], z[i]); + } + } + + public static void main(String[] args) { + double[] interesting = { + -0.0f, + +0.0f, + -1.0f, + +1.0f, + -3.14f, + +3.14f, + -100.0f, + +100.0f, + -4444.44f, + +4444.44f, + Double.MIN_NORMAL, + Double.MIN_VALUE, + Double.MAX_VALUE, + Double.NEGATIVE_INFINITY, + Double.POSITIVE_INFINITY, + Double.NaN + }; + // Initialize cross-values for the interesting values. + int total = interesting.length * interesting.length; + double[] x = new double[total]; + double[] y = new double[total]; + double[] z = new double[total]; + int k = 0; + for (int i = 0; i < interesting.length; i++) { + for (int j = 0; j < interesting.length; j++) { + x[k] = 0; + y[k] = interesting[i]; + z[k] = interesting[j]; + k++; + } + } + + // And test. + doitMin(x, y, z); + for (int i = 0; i < total; i++) { + double expected = Math.min(y[i], z[i]); + expectEquals(expected, x[i]); + } + doitMax(x, y, z); + for (int i = 0; i < total; i++) { + double expected = Math.max(y[i], z[i]); + expectEquals(expected, x[i]); + } + + System.out.println("passed"); + } + + private static void expectEquals(double expected, double result) { + // Tests the bits directly. This distinguishes correctly between +0.0 + // and -0.0 and returns a canonical representation for all NaN. + long expected_bits = Double.doubleToLongBits(expected); + long result_bits = Double.doubleToLongBits(result); + if (expected_bits != result_bits) { + throw new Error("Expected: " + expected + + "(0x" + Long.toHexString(expected_bits) + "), found: " + result + + "(0x" + Long.toHexString(result_bits) + ")"); + } + } +} diff --git a/test/651-checker-float-simd-minmax/expected.txt b/test/651-checker-float-simd-minmax/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/651-checker-float-simd-minmax/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/651-checker-float-simd-minmax/info.txt b/test/651-checker-float-simd-minmax/info.txt new file mode 100644 index 0000000000..73af1242c0 --- /dev/null +++ b/test/651-checker-float-simd-minmax/info.txt @@ -0,0 +1 @@ +Functional tests on min/max SIMD vectorization. diff --git a/test/651-checker-float-simd-minmax/src/Main.java b/test/651-checker-float-simd-minmax/src/Main.java new file mode 100644 index 0000000000..bd412e02e9 --- /dev/null +++ b/test/651-checker-float-simd-minmax/src/Main.java @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for MIN/MAX vectorization. + */ +public class Main { + + /// CHECK-START: void Main.doitMin(float[], float[], float[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:f\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinFloatFloat loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + // + // TODO x86: 0.0 vs -0.0? + // + /// CHECK-START-ARM64: void Main.doitMin(float[], float[], float[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + private static void doitMin(float[] x, float[] y, float[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = Math.min(y[i], z[i]); + } + } + + /// CHECK-START: void Main.doitMax(float[], float[], float[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:f\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:f\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxFloatFloat loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + // + // TODO x86: 0.0 vs -0.0? + // + /// CHECK-START-ARM64: void Main.doitMax(float[], float[], float[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + private static void doitMax(float[] x, float[] y, float[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = Math.max(y[i], z[i]); + } + } + + public static void main(String[] args) { + float[] interesting = { + -0.0f, + +0.0f, + -1.0f, + +1.0f, + -3.14f, + +3.14f, + -100.0f, + +100.0f, + -4444.44f, + +4444.44f, + Float.MIN_NORMAL, + Float.MIN_VALUE, + Float.MAX_VALUE, + Float.NEGATIVE_INFINITY, + Float.POSITIVE_INFINITY, + Float.NaN + }; + // Initialize cross-values for the interesting values. + int total = interesting.length * interesting.length; + float[] x = new float[total]; + float[] y = new float[total]; + float[] z = new float[total]; + int k = 0; + for (int i = 0; i < interesting.length; i++) { + for (int j = 0; j < interesting.length; j++) { + x[k] = 0; + y[k] = interesting[i]; + z[k] = interesting[j]; + k++; + } + } + + // And test. + doitMin(x, y, z); + for (int i = 0; i < total; i++) { + float expected = Math.min(y[i], z[i]); + expectEquals(expected, x[i]); + } + doitMax(x, y, z); + for (int i = 0; i < total; i++) { + float expected = Math.max(y[i], z[i]); + expectEquals(expected, x[i]); + } + + System.out.println("passed"); + } + + private static void expectEquals(float expected, float result) { + // Tests the bits directly. This distinguishes correctly between +0.0 + // and -0.0 and returns a canonical representation for all NaN. + int expected_bits = Float.floatToIntBits(expected); + int result_bits = Float.floatToIntBits(result); + if (expected_bits != result_bits) { + throw new Error("Expected: " + expected + + "(0x" + Integer.toHexString(expected_bits) + "), found: " + result + + "(0x" + Integer.toHexString(result_bits) + ")"); + } + } +} diff --git a/test/651-checker-int-simd-minmax/expected.txt b/test/651-checker-int-simd-minmax/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/651-checker-int-simd-minmax/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/651-checker-int-simd-minmax/info.txt b/test/651-checker-int-simd-minmax/info.txt new file mode 100644 index 0000000000..73af1242c0 --- /dev/null +++ b/test/651-checker-int-simd-minmax/info.txt @@ -0,0 +1 @@ +Functional tests on min/max SIMD vectorization. diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java new file mode 100644 index 0000000000..4e05a9ded3 --- /dev/null +++ b/test/651-checker-int-simd-minmax/src/Main.java @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for MIN/MAX vectorization. + */ +public class Main { + + /// CHECK-START: void Main.doitMin(int[], int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMin(int[], int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + private static void doitMin(int[] x, int[] y, int[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = Math.min(y[i], z[i]); + } + } + + /// CHECK-START: void Main.doitMax(int[], int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMax(int[], int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:d\d+>> VecMax [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + private static void doitMax(int[] x, int[] y, int[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = Math.max(y[i], z[i]); + } + } + + public static void main(String[] args) { + int[] interesting = { + 0x00000000, 0x00000001, 0x00007fff, 0x00008000, 0x00008001, 0x0000ffff, + 0x00010000, 0x00010001, 0x00017fff, 0x00018000, 0x00018001, 0x0001ffff, + 0x7fff0000, 0x7fff0001, 0x7fff7fff, 0x7fff8000, 0x7fff8001, 0x7fffffff, + 0x80000000, 0x80000001, 0x80007fff, 0x80008000, 0x80008001, 0x8000ffff, + 0x80010000, 0x80010001, 0x80017fff, 0x80018000, 0x80018001, 0x8001ffff, + 0xffff0000, 0xffff0001, 0xffff7fff, 0xffff8000, 0xffff8001, 0xffffffff + }; + // Initialize cross-values for the interesting values. + int total = interesting.length * interesting.length; + int[] x = new int[total]; + int[] y = new int[total]; + int[] z = new int[total]; + int k = 0; + for (int i = 0; i < interesting.length; i++) { + for (int j = 0; j < interesting.length; j++) { + x[k] = 0; + y[k] = interesting[i]; + z[k] = interesting[j]; + k++; + } + } + + // And test. + doitMin(x, y, z); + for (int i = 0; i < total; i++) { + int expected = Math.min(y[i], z[i]); + expectEquals(expected, x[i]); + } + doitMax(x, y, z); + for (int i = 0; i < total; i++) { + int expected = Math.max(y[i], z[i]); + expectEquals(expected, x[i]); + } + + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/651-checker-long-simd-minmax/expected.txt b/test/651-checker-long-simd-minmax/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/651-checker-long-simd-minmax/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/651-checker-long-simd-minmax/info.txt b/test/651-checker-long-simd-minmax/info.txt new file mode 100644 index 0000000000..73af1242c0 --- /dev/null +++ b/test/651-checker-long-simd-minmax/info.txt @@ -0,0 +1 @@ +Functional tests on min/max SIMD vectorization. diff --git a/test/651-checker-long-simd-minmax/src/Main.java b/test/651-checker-long-simd-minmax/src/Main.java new file mode 100644 index 0000000000..51cf67ee00 --- /dev/null +++ b/test/651-checker-long-simd-minmax/src/Main.java @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for MIN/MAX vectorization. + */ +public class Main { + + /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:j\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinLongLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + // + // Not directly supported for longs. + // + /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (after) + /// CHECK-NOT: VecMin + private static void doitMin(long[] x, long[] y, long[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = Math.min(y[i], z[i]); + } + } + + /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:j\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxLongLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>> outer_loop:none + // + // Not directly supported for longs. + // + /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (after) + /// CHECK-NOT: VecMax + private static void doitMax(long[] x, long[] y, long[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = Math.max(y[i], z[i]); + } + } + + public static void main(String[] args) { + long[] interesting = { + 0x0000000000000000L, 0x0000000000000001L, 0x000000007fffffffL, + 0x0000000080000000L, 0x0000000080000001L, 0x00000000ffffffffL, + 0x0000000100000000L, 0x0000000100000001L, 0x000000017fffffffL, + 0x0000000180000000L, 0x0000000180000001L, 0x00000001ffffffffL, + 0x7fffffff00000000L, 0x7fffffff00000001L, 0x7fffffff7fffffffL, + 0x7fffffff80000000L, 0x7fffffff80000001L, 0x7fffffffffffffffL, + 0x8000000000000000L, 0x8000000000000001L, 0x800000007fffffffL, + 0x8000000080000000L, 0x8000000080000001L, 0x80000000ffffffffL, + 0x8000000100000000L, 0x8000000100000001L, 0x800000017fffffffL, + 0x8000000180000000L, 0x8000000180000001L, 0x80000001ffffffffL, + 0xffffffff00000000L, 0xffffffff00000001L, 0xffffffff7fffffffL, + 0xffffffff80000000L, 0xffffffff80000001L, 0xffffffffffffffffL + }; + // Initialize cross-values for the interesting values. + int total = interesting.length * interesting.length; + long[] x = new long[total]; + long[] y = new long[total]; + long[] z = new long[total]; + int k = 0; + for (int i = 0; i < interesting.length; i++) { + for (int j = 0; j < interesting.length; j++) { + x[k] = 0; + y[k] = interesting[i]; + z[k] = interesting[j]; + k++; + } + } + + // And test. + doitMin(x, y, z); + for (int i = 0; i < total; i++) { + long expected = Math.min(y[i], z[i]); + expectEquals(expected, x[i]); + } + doitMax(x, y, z); + for (int i = 0; i < total; i++) { + long expected = Math.max(y[i], z[i]); + expectEquals(expected, x[i]); + } + + System.out.println("passed"); + } + + private static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/651-checker-short-simd-minmax/expected.txt b/test/651-checker-short-simd-minmax/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/651-checker-short-simd-minmax/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/651-checker-short-simd-minmax/info.txt b/test/651-checker-short-simd-minmax/info.txt new file mode 100644 index 0000000000..73af1242c0 --- /dev/null +++ b/test/651-checker-short-simd-minmax/info.txt @@ -0,0 +1 @@ +Functional tests on min/max SIMD vectorization. diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java new file mode 100644 index 0000000000..f34f5264c1 --- /dev/null +++ b/test/651-checker-short-simd-minmax/src/Main.java @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for MIN/MAX vectorization. + */ +public class Main { + + /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + // TODO: narrow type vectorization. + /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecMin + private static void doitMin(short[] x, short[] y, short[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (short) Math.min(y[i], z[i]); + } + } + + /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Max:i\d+>> InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Max>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + // TODO: narrow type vectorization. + /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecMax + private static void doitMax(short[] x, short[] y, short[] z) { + int min = Math.min(x.length, Math.min(y.length, z.length)); + for (int i = 0; i < min; i++) { + x[i] = (short) Math.max(y[i], z[i]); + } + } + + public static void main(String[] args) { + short[] interesting = { + (short) 0x0000, (short) 0x0001, (short) 0x007f, + (short) 0x0080, (short) 0x0081, (short) 0x00ff, + (short) 0x0100, (short) 0x0101, (short) 0x017f, + (short) 0x0180, (short) 0x0181, (short) 0x01ff, + (short) 0x7f00, (short) 0x7f01, (short) 0x7f7f, + (short) 0x7f80, (short) 0x7f81, (short) 0x7fff, + (short) 0x8000, (short) 0x8001, (short) 0x807f, + (short) 0x8080, (short) 0x8081, (short) 0x80ff, + (short) 0x8100, (short) 0x8101, (short) 0x817f, + (short) 0x8180, (short) 0x8181, (short) 0x81ff, + (short) 0xff00, (short) 0xff01, (short) 0xff7f, + (short) 0xff80, (short) 0xff81, (short) 0xffff + }; + // Initialize cross-values for the interesting values. + int total = interesting.length * interesting.length; + short[] x = new short[total]; + short[] y = new short[total]; + short[] z = new short[total]; + int k = 0; + for (int i = 0; i < interesting.length; i++) { + for (int j = 0; j < interesting.length; j++) { + x[k] = 0; + y[k] = interesting[i]; + z[k] = interesting[j]; + k++; + } + } + + // And test. + doitMin(x, y, z); + for (int i = 0; i < total; i++) { + short expected = (short) Math.min(y[i], z[i]); + expectEquals(expected, x[i]); + } + doitMax(x, y, z); + for (int i = 0; i < total; i++) { + short expected = (short) Math.max(y[i], z[i]); + expectEquals(expected, x[i]); + } + + System.out.println("passed"); + } + + private static void expectEquals(short expected, short result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} |