ARM: ART Vectorizer (64-bit vectors).
Basic vectorization support with 64-bit vector length on ARM 32-bit
platforms (128-bit vectors require massive changes in register
allocator).
Test: test-art-target, test-art-host
Change-Id: I1d740146c3f00170fc033ae5fd69d59321ddcbf4
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 805a3f4..ad3283a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -408,6 +408,17 @@
void GenerateDivRemConstantIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ vixl::aarch32::MemOperand VecAddress(
+ HVecMemoryOperation* instruction,
+ // This function may acquire a scratch register.
+ vixl::aarch32::UseScratchRegisterScope* temps_scope,
+ /*out*/ vixl32::Register* scratch);
+ vixl::aarch32::AlignedMemOperand VecAddressUnaligned(
+ HVecMemoryOperation* instruction,
+ // This function may acquire a scratch register.
+ vixl::aarch32::UseScratchRegisterScope* temps_scope,
+ /*out*/ vixl32::Register* scratch);
+
ArmVIXLAssembler* const assembler_;
CodeGeneratorARMVIXL* const codegen_;
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 53f314e..527691d 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -15,19 +15,62 @@
*/
#include "code_generator_arm_vixl.h"
+#include "mirror/array-inl.h"
+
+namespace vixl32 = vixl::aarch32;
+using namespace vixl32; // NOLINT(build/namespaces)
namespace art {
namespace arm {
-// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
-#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
+using helpers::DRegisterFrom;
+using helpers::Int64ConstantFrom;
+using helpers::InputDRegisterAt;
+using helpers::InputRegisterAt;
+using helpers::OutputDRegister;
+using helpers::RegisterFrom;
+
+#define __ GetVIXLAssembler()->
void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vdup(Untyped8, dst, InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vdup(Untyped16, dst, InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
@@ -51,13 +94,17 @@
LocationSummary* locations = new (arena) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(),
+ instruction->IsVecNot() ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ break;
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- DCHECK(locations);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -78,7 +125,27 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vneg(DataTypeValue::S8, dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vneg(DataTypeValue::S16, dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vneg(DataTypeValue::S32, dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecAbs(HVecAbs* instruction) {
@@ -86,7 +153,27 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vabs(DataTypeValue::S8, dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vabs(DataTypeValue::S16, dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vabs(DataTypeValue::S32, dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) {
@@ -94,7 +181,25 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean: // special case boolean-not
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vmov(I8, dst, 1);
+ __ Veor(dst, dst, src);
+ break;
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ __ Vmvn(I8, dst, src); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector binary operations.
@@ -106,9 +211,9 @@
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- DCHECK(locations);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -121,7 +226,28 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vadd(I8, dst, lhs, rhs);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vadd(I16, dst, lhs, rhs);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vadd(I32, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
@@ -129,7 +255,40 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Vrhadd(DataTypeValue::U8, dst, lhs, rhs)
+ : __ Vhadd(DataTypeValue::U8, dst, lhs, rhs);
+ } else {
+ instruction->IsRounded()
+ ? __ Vrhadd(DataTypeValue::S8, dst, lhs, rhs)
+ : __ Vhadd(DataTypeValue::S8, dst, lhs, rhs);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Vrhadd(DataTypeValue::U16, dst, lhs, rhs)
+ : __ Vhadd(DataTypeValue::U16, dst, lhs, rhs);
+ } else {
+ instruction->IsRounded()
+ ? __ Vrhadd(DataTypeValue::S16, dst, lhs, rhs)
+ : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
@@ -137,7 +296,28 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vsub(I8, dst, lhs, rhs);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vsub(I16, dst, lhs, rhs);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vsub(I32, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) {
@@ -145,7 +325,28 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vmul(I8, dst, lhs, rhs);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vmul(I16, dst, lhs, rhs);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vmul(I32, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) {
@@ -161,7 +362,40 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Vmin(DataTypeValue::U8, dst, lhs, rhs);
+ } else {
+ __ Vmin(DataTypeValue::S8, dst, lhs, rhs);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Vmin(DataTypeValue::U16, dst, lhs, rhs);
+ } else {
+ __ Vmin(DataTypeValue::S16, dst, lhs, rhs);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Vmin(DataTypeValue::U32, dst, lhs, rhs);
+ } else {
+ __ Vmin(DataTypeValue::S32, dst, lhs, rhs);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) {
@@ -169,7 +403,40 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Vmax(DataTypeValue::U8, dst, lhs, rhs);
+ } else {
+ __ Vmax(DataTypeValue::S8, dst, lhs, rhs);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Vmax(DataTypeValue::U16, dst, lhs, rhs);
+ } else {
+ __ Vmax(DataTypeValue::S16, dst, lhs, rhs);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ __ Vmax(DataTypeValue::U32, dst, lhs, rhs);
+ } else {
+ __ Vmax(DataTypeValue::S32, dst, lhs, rhs);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
@@ -177,7 +444,22 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ __ Vand(I8, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
@@ -193,7 +475,22 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ __ Vorr(I8, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) {
@@ -201,7 +498,22 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ __ Veor(I8, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector shift operations.
@@ -212,8 +524,9 @@
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- DCHECK(locations);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -226,7 +539,28 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vshl(I8, dst, lhs, value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vshl(I16, dst, lhs, value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vshl(I32, dst, lhs, value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) {
@@ -234,7 +568,28 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vshr(DataTypeValue::S8, dst, lhs, value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vshr(DataTypeValue::S16, dst, lhs, value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vshr(DataTypeValue::S32, dst, lhs, value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) {
@@ -242,7 +597,28 @@
}
void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vshr(DataTypeValue::U8, dst, lhs, value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vshr(DataTypeValue::U16, dst, lhs, value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Vshr(DataTypeValue::U32, dst, lhs, value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
@@ -253,20 +629,187 @@
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
+// Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word
+// size equals to 4).
+static bool IsWordAligned(HVecMemoryOperation* instruction) {
+ return instruction->GetAlignment().IsAlignedAt(4u);
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector memory operations. Returns the memory operand and,
+// if used, sets the output parameter scratch to a temporary register used in this operand,
+// so that the client can release it right after the memory operand use.
+MemOperand InstructionCodeGeneratorARMVIXL::VecAddress(
+ HVecMemoryOperation* instruction,
+ UseScratchRegisterScope* temps_scope,
+ /*out*/ vixl32::Register* scratch) {
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::Register base = InputRegisterAt(instruction, 0);
+
+ Location index = locations->InAt(1);
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
+ size_t shift = ComponentSizeShiftWidth(size);
+
+ // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
+ DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
+
+ if (index.IsConstant()) {
+ offset += Int64ConstantFrom(index) << shift;
+ return MemOperand(base, offset);
+ } else {
+ *scratch = temps_scope->Acquire();
+ __ Add(*scratch, base, Operand(RegisterFrom(index), ShiftType::LSL, shift));
+
+ return MemOperand(*scratch, offset);
+ }
+}
+
+AlignedMemOperand InstructionCodeGeneratorARMVIXL::VecAddressUnaligned(
+ HVecMemoryOperation* instruction,
+ UseScratchRegisterScope* temps_scope,
+ /*out*/ vixl32::Register* scratch) {
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::Register base = InputRegisterAt(instruction, 0);
+
+ Location index = locations->InAt(1);
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
+ size_t shift = ComponentSizeShiftWidth(size);
+
+ // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
+ DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
+
+ if (index.IsConstant()) {
+ offset += Int64ConstantFrom(index) << shift;
+ __ Add(*scratch, base, offset);
+ } else {
+ *scratch = temps_scope->Acquire();
+ __ Add(*scratch, base, offset);
+ __ Add(*scratch, *scratch, Operand(RegisterFrom(index), ShiftType::LSL, shift));
+ }
+ return AlignedMemOperand(*scratch, kNoAlignment);
+}
+
void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
}
void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ vixl32::DRegister reg = OutputDRegister(instruction);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register scratch;
+
+ DCHECK(instruction->GetPackedType() != Primitive::kPrimChar || !instruction->IsStringCharAt());
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (IsWordAligned(instruction)) {
+ __ Vldr(reg, VecAddress(instruction, &temps, &scratch));
+ } else {
+ __ Vld1(Untyped8,
+ NeonRegisterList(reg, kMultipleLanes),
+ VecAddressUnaligned(instruction, &temps, &scratch));
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (IsWordAligned(instruction)) {
+ __ Vldr(reg, VecAddress(instruction, &temps, &scratch));
+ } else {
+ __ Vld1(Untyped16,
+ NeonRegisterList(reg, kMultipleLanes),
+ VecAddressUnaligned(instruction, &temps, &scratch));
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (IsWordAligned(instruction)) {
+ __ Vldr(reg, VecAddress(instruction, &temps, &scratch));
+ } else {
+ __ Vld1(Untyped32,
+ NeonRegisterList(reg, kMultipleLanes),
+ VecAddressUnaligned(instruction, &temps, &scratch));
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
}
void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ vixl32::DRegister reg = InputDRegisterAt(instruction, 2);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register scratch;
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (IsWordAligned(instruction)) {
+ __ Vstr(reg, VecAddress(instruction, &temps, &scratch));
+ } else {
+ __ Vst1(Untyped8,
+ NeonRegisterList(reg, kMultipleLanes),
+ VecAddressUnaligned(instruction, &temps, &scratch));
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (IsWordAligned(instruction)) {
+ __ Vstr(reg, VecAddress(instruction, &temps, &scratch));
+ } else {
+ __ Vst1(Untyped16,
+ NeonRegisterList(reg, kMultipleLanes),
+ VecAddressUnaligned(instruction, &temps, &scratch));
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (IsWordAligned(instruction)) {
+ __ Vstr(reg, VecAddress(instruction, &temps, &scratch));
+ } else {
+ __ Vst1(Untyped32,
+ NeonRegisterList(reg, kMultipleLanes),
+ VecAddressUnaligned(instruction, &temps, &scratch));
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
#undef __
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 9c8a632..bfb23f1 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -1021,6 +1021,23 @@
switch (compiler_driver_->GetInstructionSet()) {
case kArm:
case kThumb2:
+ // Allow vectorization for all ARM devices, because Android assumes that
+ // ARM 32-bit always supports advanced SIMD.
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(8);
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ *restrictions |= kNoDiv | kNoStringCharAt;
+ return TrySetVectorLength(4);
+ case Primitive::kPrimInt:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(2);
+ default:
+ break;
+ }
return false;
case kArm64:
// Allow vectorization for all ARM devices, because Android assumes that