From d4bccf1ece319a3a99e03ecbcbbf40bb82b9e331 Mon Sep 17 00:00:00 2001 From: Artem Serov Date: Mon, 3 Apr 2017 18:47:32 +0100 Subject: ARM64: Support 128-bit registers for SIMD. Test: test-art-host, test-art-target Change-Id: Ifb931a99d34ea77602a0e0781040ed092de9faaa --- compiler/optimizing/loop_optimization.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'compiler/optimizing/loop_optimization.cc') diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index bf18cc9bbc..ec02127bee 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -770,22 +770,21 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric return false; case kArm64: // Allow vectorization for all ARM devices, because Android assumes that - // ARMv8 AArch64 always supports advanced SIMD. For now, only D registers - // (64-bit vectors) not Q registers (128-bit vectors). + // ARMv8 AArch64 always supports advanced SIMD. switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: *restrictions |= kNoDiv | kNoAbs; - return TrySetVectorLength(8); + return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: *restrictions |= kNoDiv | kNoAbs; - return TrySetVectorLength(4); + return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; - return TrySetVectorLength(2); + return TrySetVectorLength(4); case Primitive::kPrimFloat: - return TrySetVectorLength(2); + return TrySetVectorLength(4); default: return false; } -- cgit v1.2.3-59-g8ed1b From b31f91fd1811c9047591282dd003cf22b54938a1 Mon Sep 17 00:00:00 2001 From: Artem Serov Date: Wed, 5 Apr 2017 11:31:19 +0100 Subject: ARM64: Support vectorization for double and long. Test: test-art-host, test-art-target Change-Id: I1d4db1763b64737766f9756e5d0f85c5736e3522 --- compiler/optimizing/code_generator_vector_arm64.cc | 166 ++++++++++++++++----- compiler/optimizing/common_arm64.h | 5 + compiler/optimizing/loop_optimization.cc | 5 + test/640-checker-double-simd/src/Main.java | 36 +++-- test/640-checker-long-simd/src/Main.java | 34 +++-- 5 files changed, 182 insertions(+), 64 deletions(-) (limited to 'compiler/optimizing/loop_optimization.cc') diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 11c5e380fe..0923920366 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -22,7 +22,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) namespace art { namespace arm64 { -using helpers::DRegisterFrom; +using helpers::VRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; using helpers::Int64ConstantFrom; @@ -38,10 +38,12 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimFloat: + case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -53,7 +55,7 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -69,9 +71,17 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* DCHECK_EQ(4u, instruction->GetVectorLength()); __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Dup(dst.V2D(), XRegisterFrom(locations->InAt(0))); + break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Dup(dst.V4S(), DRegisterFrom(locations->InAt(0)).V4S(), 0); + __ Dup(dst.V4S(), VRegisterFrom(locations->InAt(0)).V4S(), 0); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Dup(dst.V2D(), VRegisterFrom(locations->InAt(0)).V2D(), 0); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -109,7 +119,9 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: + case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -125,8 +137,8 @@ void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) { void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister src = DRegisterFrom(locations->InAt(0)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister src = VRegisterFrom(locations->InAt(0)); + VRegister dst = VRegisterFrom(locations->Out()); Primitive::Type from = instruction->GetInputType(); Primitive::Type to = instruction->GetResultType(); if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { @@ -143,8 +155,8 @@ void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) { void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister src = DRegisterFrom(locations->InAt(0)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister src = VRegisterFrom(locations->InAt(0)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -159,10 +171,18 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Neg(dst.V4S(), src.V4S()); break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Neg(dst.V2D(), src.V2D()); + break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fneg(dst.V4S(), src.V4S()); break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fneg(dst.V2D(), src.V2D()); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -175,8 +195,8 @@ void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) { void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister src = DRegisterFrom(locations->InAt(0)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister src = VRegisterFrom(locations->InAt(0)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -191,10 +211,18 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Abs(dst.V4S(), src.V4S()); break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Abs(dst.V2D(), src.V2D()); + break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fabs(dst.V4S(), src.V4S()); break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fabs(dst.V2D(), src.V2D()); + break; default: LOG(FATAL) << "Unsupported SIMD type"; } @@ -206,8 +234,8 @@ void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) { void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister src = DRegisterFrom(locations->InAt(0)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister src = VRegisterFrom(locations->InAt(0)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: // special case boolean-not DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -218,6 +246,7 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: __ Not(dst.V16B(), src.V16B()); // lanes do not matter break; default: @@ -235,7 +264,9 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: + case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -252,9 +283,9 @@ void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) { void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister rhs = DRegisterFrom(locations->InAt(1)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -269,10 +300,18 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Add(dst.V4S(), lhs.V4S(), rhs.V4S()); break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Add(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S()); break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -285,9 +324,9 @@ void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) { void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister rhs = DRegisterFrom(locations->InAt(1)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -302,10 +341,18 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S()); break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S()); break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -318,9 +365,9 @@ void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) { void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister rhs = DRegisterFrom(locations->InAt(1)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); @@ -339,6 +386,10 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S()); break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -351,14 +402,18 @@ void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) { void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister rhs = DRegisterFrom(locations->InAt(1)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S()); break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D()); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -371,16 +426,18 @@ void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister rhs = DRegisterFrom(locations->InAt(1)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: + case Primitive::kPrimDouble: __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: @@ -403,16 +460,18 @@ void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) { void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister rhs = DRegisterFrom(locations->InAt(1)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: + case Primitive::kPrimDouble: __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: @@ -427,16 +486,18 @@ void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) { void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister rhs = DRegisterFrom(locations->InAt(1)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: + case Primitive::kPrimDouble: __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: @@ -453,6 +514,7 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -469,8 +531,8 @@ void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) { void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister dst = VRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: @@ -486,6 +548,10 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Shl(dst.V4S(), lhs.V4S(), value); break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Shl(dst.V2D(), lhs.V2D(), value); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -498,8 +564,8 @@ void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) { void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister dst = VRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: @@ -515,6 +581,10 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Sshr(dst.V4S(), lhs.V4S(), value); break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Sshr(dst.V2D(), lhs.V2D(), value); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -527,8 +597,8 @@ void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) { void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { LocationSummary* locations = instruction->GetLocations(); - FPRegister lhs = DRegisterFrom(locations->InAt(0)); - FPRegister dst = DRegisterFrom(locations->Out()); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister dst = VRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: @@ -544,6 +614,10 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ushr(dst.V4S(), lhs.V4S(), value); break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Ushr(dst.V2D(), lhs.V2D(), value); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -561,7 +635,9 @@ static void CreateVecMemLocations(ArenaAllocator* arena, case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: + case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_load) { @@ -613,7 +689,7 @@ void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { Location reg_loc = Location::NoLocation(); MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); - FPRegister reg = DRegisterFrom(reg_loc); + VRegister reg = VRegisterFrom(reg_loc); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -630,6 +706,11 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ld1(reg.V4S(), mem); break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Ld1(reg.V2D(), mem); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -643,7 +724,7 @@ void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { Location reg_loc = Location::NoLocation(); MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); - FPRegister reg = DRegisterFrom(reg_loc); + VRegister reg = VRegisterFrom(reg_loc); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -660,6 +741,11 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ St1(reg.V4S(), mem); break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ St1(reg.V2D(), mem); + break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 5372b97247..721f74eeee 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -97,6 +97,11 @@ inline vixl::aarch64::FPRegister QRegisterFrom(Location location) { return vixl::aarch64::FPRegister::GetQRegFromCode(location.reg()); } +inline vixl::aarch64::FPRegister VRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()) << location; + return vixl::aarch64::FPRegister::GetVRegFromCode(location.reg()); +} + inline vixl::aarch64::FPRegister SRegisterFrom(Location location) { DCHECK(location.IsFpuRegister()) << location; return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg()); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index ec02127bee..6337361712 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -783,8 +783,13 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric case Primitive::kPrimInt: *restrictions |= kNoDiv; return TrySetVectorLength(4); + case Primitive::kPrimLong: + *restrictions |= kNoDiv | kNoMul; + return TrySetVectorLength(2); case Primitive::kPrimFloat: return TrySetVectorLength(4); + case Primitive::kPrimDouble: + return TrySetVectorLength(2); default: return false; } diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java index 43f65f1792..0d4f87a6cd 100644 --- a/test/640-checker-double-simd/src/Main.java +++ b/test/640-checker-double-simd/src/Main.java @@ -32,8 +32,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.add(double) loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecAdd loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void add(double x) { for (int i = 0; i < 128; i++) a[i] += x; @@ -45,8 +47,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.sub(double) loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecSub loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void sub(double x) { for (int i = 0; i < 128; i++) a[i] -= x; @@ -58,8 +62,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.mul(double) loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecMul loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void mul(double x) { for (int i = 0; i < 128; i++) a[i] *= x; @@ -71,8 +77,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.div(double) loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecDiv loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void div(double x) { for (int i = 0; i < 128; i++) a[i] /= x; @@ -84,8 +92,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecNeg loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void neg() { for (int i = 0; i < 128; i++) a[i] = -a[i]; @@ -97,8 +107,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.abs() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecAbs loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void abs() { for (int i = 0; i < 128; i++) a[i] = Math.abs(a[i]); diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java index 90a2e76538..56411821f1 100644 --- a/test/640-checker-long-simd/src/Main.java +++ b/test/640-checker-long-simd/src/Main.java @@ -31,8 +31,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.add(long) loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecAdd loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void add(long x) { for (int i = 0; i < 128; i++) a[i] += x; @@ -44,8 +46,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.sub(long) loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecSub loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void sub(long x) { for (int i = 0; i < 128; i++) a[i] -= x; @@ -56,9 +60,9 @@ public class Main { /// CHECK-DAG: ArrayGet loop:<> outer_loop:none /// CHECK-DAG: ArraySet loop:<> outer_loop:none // + // Not supported for longs. /// CHECK-START-ARM64: void Main.mul(long) loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-NOT: VecMul static void mul(long x) { for (int i = 0; i < 128; i++) a[i] *= x; @@ -84,8 +88,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecNeg loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void neg() { for (int i = 0; i < 128; i++) a[i] = -a[i]; @@ -97,8 +103,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.not() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecNot loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void not() { for (int i = 0; i < 128; i++) a[i] = ~a[i]; @@ -110,8 +118,10 @@ public class Main { /// CHECK-DAG: ArraySet loop:<> outer_loop:none // /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after) - // - // TODO: fill in when supported + /// CHECK-DAG: Phi loop:<> outer_loop:none + /// CHECK-DAG: VecLoad loop:<> outer_loop:none + /// CHECK-DAG: VecShl loop:<> outer_loop:none + /// CHECK-DAG: VecStore loop:<> outer_loop:none static void shl4() { for (int i = 0; i < 128; i++) a[i] <<= 4; -- cgit v1.2.3-59-g8ed1b