diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 11 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_vector_arm_vixl.cc | 605 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 17 |
3 files changed, 602 insertions, 31 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 805a3f4366..ad3283ad4f 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -408,6 +408,17 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateDivRemConstantIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + vixl::aarch32::MemOperand VecAddress( + HVecMemoryOperation* instruction, + // This function may acquire a scratch register. + vixl::aarch32::UseScratchRegisterScope* temps_scope, + /*out*/ vixl32::Register* scratch); + vixl::aarch32::AlignedMemOperand VecAddressUnaligned( + HVecMemoryOperation* instruction, + // This function may acquire a scratch register. + vixl::aarch32::UseScratchRegisterScope* temps_scope, + /*out*/ vixl32::Register* scratch); + ArmVIXLAssembler* const assembler_; CodeGeneratorARMVIXL* const codegen_; diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 53f314ec40..527691d9d9 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -15,19 +15,62 @@ */ #include "code_generator_arm_vixl.h" +#include "mirror/array-inl.h" + +namespace vixl32 = vixl::aarch32; +using namespace vixl32; // NOLINT(build/namespaces) namespace art { namespace arm { -// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. -#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT +using helpers::DRegisterFrom; +using helpers::Int64ConstantFrom; +using helpers::InputDRegisterAt; +using helpers::InputRegisterAt; +using helpers::OutputDRegister; +using helpers::RegisterFrom; + +#define __ GetVIXLAssembler()-> void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vdup(Untyped8, dst, InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vdup(Untyped16, dst, InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { @@ -51,13 +94,17 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in LocationSummary* locations = new (arena) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + instruction->IsVecNot() ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + break; case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -78,7 +125,27 @@ void LocationsBuilderARMVIXL::VisitVecNeg(HVecNeg* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister src = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vneg(DataTypeValue::S8, dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vneg(DataTypeValue::S16, dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vneg(DataTypeValue::S32, dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecAbs(HVecAbs* instruction) { @@ -86,7 +153,27 @@ void LocationsBuilderARMVIXL::VisitVecAbs(HVecAbs* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister src = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vabs(DataTypeValue::S8, dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vabs(DataTypeValue::S16, dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vabs(DataTypeValue::S32, dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) { @@ -94,7 +181,25 @@ void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister src = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: // special case boolean-not + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vmov(I8, dst, 1); + __ Veor(dst, dst, src); + break; + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + __ Vmvn(I8, dst, src); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector binary operations. @@ -106,9 +211,9 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -121,7 +226,28 @@ void LocationsBuilderARMVIXL::VisitVecAdd(HVecAdd* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vadd(I8, dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vadd(I16, dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vadd(I32, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -129,7 +255,40 @@ void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Vrhadd(DataTypeValue::U8, dst, lhs, rhs) + : __ Vhadd(DataTypeValue::U8, dst, lhs, rhs); + } else { + instruction->IsRounded() + ? __ Vrhadd(DataTypeValue::S8, dst, lhs, rhs) + : __ Vhadd(DataTypeValue::S8, dst, lhs, rhs); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Vrhadd(DataTypeValue::U16, dst, lhs, rhs) + : __ Vhadd(DataTypeValue::U16, dst, lhs, rhs); + } else { + instruction->IsRounded() + ? __ Vrhadd(DataTypeValue::S16, dst, lhs, rhs) + : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) { @@ -137,7 +296,28 @@ void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vsub(I8, dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vsub(I16, dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vsub(I32, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) { @@ -145,7 +325,28 @@ void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vmul(I8, dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vmul(I16, dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmul(I32, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) { @@ -161,7 +362,40 @@ void LocationsBuilderARMVIXL::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Vmin(DataTypeValue::U8, dst, lhs, rhs); + } else { + __ Vmin(DataTypeValue::S8, dst, lhs, rhs); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Vmin(DataTypeValue::U16, dst, lhs, rhs); + } else { + __ Vmin(DataTypeValue::S16, dst, lhs, rhs); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Vmin(DataTypeValue::U32, dst, lhs, rhs); + } else { + __ Vmin(DataTypeValue::S32, dst, lhs, rhs); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) { @@ -169,7 +403,40 @@ void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Vmax(DataTypeValue::U8, dst, lhs, rhs); + } else { + __ Vmax(DataTypeValue::S8, dst, lhs, rhs); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Vmax(DataTypeValue::U16, dst, lhs, rhs); + } else { + __ Vmax(DataTypeValue::S16, dst, lhs, rhs); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Vmax(DataTypeValue::U32, dst, lhs, rhs); + } else { + __ Vmax(DataTypeValue::S32, dst, lhs, rhs); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) { @@ -177,7 +444,22 @@ void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + __ Vand(I8, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) { @@ -193,7 +475,22 @@ void LocationsBuilderARMVIXL::VisitVecOr(HVecOr* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + __ Vorr(I8, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) { @@ -201,7 +498,22 @@ void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + __ Veor(I8, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector shift operations. @@ -212,8 +524,9 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: - case Primitive::kPrimLong: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -226,7 +539,28 @@ void LocationsBuilderARMVIXL::VisitVecShl(HVecShl* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vshl(I8, dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vshl(I16, dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vshl(I32, dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) { @@ -234,7 +568,28 @@ void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vshr(DataTypeValue::S8, dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vshr(DataTypeValue::S16, dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vshr(DataTypeValue::S32, dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) { @@ -242,7 +597,28 @@ void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vshr(DataTypeValue::U8, dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vshr(DataTypeValue::U16, dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vshr(DataTypeValue::U32, dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { @@ -253,20 +629,187 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAcc LOG(FATAL) << "No SIMD for " << instr->GetId(); } +// Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word +// size equals to 4). +static bool IsWordAligned(HVecMemoryOperation* instruction) { + return instruction->GetAlignment().IsAlignedAt(4u); +} + +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector memory operations. Returns the memory operand and, +// if used, sets the output parameter scratch to a temporary register used in this operand, +// so that the client can release it right after the memory operand use. +MemOperand InstructionCodeGeneratorARMVIXL::VecAddress( + HVecMemoryOperation* instruction, + UseScratchRegisterScope* temps_scope, + /*out*/ vixl32::Register* scratch) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register base = InputRegisterAt(instruction, 0); + + Location index = locations->InAt(1); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); + size_t shift = ComponentSizeShiftWidth(size); + + // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet. + DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); + + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << shift; + return MemOperand(base, offset); + } else { + *scratch = temps_scope->Acquire(); + __ Add(*scratch, base, Operand(RegisterFrom(index), ShiftType::LSL, shift)); + + return MemOperand(*scratch, offset); + } +} + +AlignedMemOperand InstructionCodeGeneratorARMVIXL::VecAddressUnaligned( + HVecMemoryOperation* instruction, + UseScratchRegisterScope* temps_scope, + /*out*/ vixl32::Register* scratch) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register base = InputRegisterAt(instruction, 0); + + Location index = locations->InAt(1); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); + size_t shift = ComponentSizeShiftWidth(size); + + // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet. + DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); + + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << shift; + __ Add(*scratch, base, offset); + } else { + *scratch = temps_scope->Acquire(); + __ Add(*scratch, base, offset); + __ Add(*scratch, *scratch, Operand(RegisterFrom(index), ShiftType::LSL, shift)); + } + return AlignedMemOperand(*scratch, kNoAlignment); +} + void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); } void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + vixl32::DRegister reg = OutputDRegister(instruction); + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register scratch; + + DCHECK(instruction->GetPackedType() != Primitive::kPrimChar || !instruction->IsStringCharAt()); + + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (IsWordAligned(instruction)) { + __ Vldr(reg, VecAddress(instruction, &temps, &scratch)); + } else { + __ Vld1(Untyped8, + NeonRegisterList(reg, kMultipleLanes), + VecAddressUnaligned(instruction, &temps, &scratch)); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (IsWordAligned(instruction)) { + __ Vldr(reg, VecAddress(instruction, &temps, &scratch)); + } else { + __ Vld1(Untyped16, + NeonRegisterList(reg, kMultipleLanes), + VecAddressUnaligned(instruction, &temps, &scratch)); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (IsWordAligned(instruction)) { + __ Vldr(reg, VecAddress(instruction, &temps, &scratch)); + } else { + __ Vld1(Untyped32, + NeonRegisterList(reg, kMultipleLanes), + VecAddressUnaligned(instruction, &temps, &scratch)); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); } void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + vixl32::DRegister reg = InputDRegisterAt(instruction, 2); + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register scratch; + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (IsWordAligned(instruction)) { + __ Vstr(reg, VecAddress(instruction, &temps, &scratch)); + } else { + __ Vst1(Untyped8, + NeonRegisterList(reg, kMultipleLanes), + VecAddressUnaligned(instruction, &temps, &scratch)); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (IsWordAligned(instruction)) { + __ Vstr(reg, VecAddress(instruction, &temps, &scratch)); + } else { + __ Vst1(Untyped16, + NeonRegisterList(reg, kMultipleLanes), + VecAddressUnaligned(instruction, &temps, &scratch)); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (IsWordAligned(instruction)) { + __ Vstr(reg, VecAddress(instruction, &temps, &scratch)); + } else { + __ Vst1(Untyped32, + NeonRegisterList(reg, kMultipleLanes), + VecAddressUnaligned(instruction, &temps, &scratch)); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } #undef __ diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index b61d7b80d1..83f31c77d3 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -1097,6 +1097,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (compiler_driver_->GetInstructionSet()) { case kArm: case kThumb2: + // Allow vectorization for all ARM devices, because Android assumes that + // ARM 32-bit always supports advanced SIMD. + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + *restrictions |= kNoDiv; + return TrySetVectorLength(8); + case Primitive::kPrimChar: + case Primitive::kPrimShort: + *restrictions |= kNoDiv | kNoStringCharAt; + return TrySetVectorLength(4); + case Primitive::kPrimInt: + *restrictions |= kNoDiv; + return TrySetVectorLength(2); + default: + break; + } return false; case kArm64: // Allow vectorization for all ARM devices, because Android assumes that |