summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm64.cc42
-rw-r--r--compiler/optimizing/code_generator_arm64.h5
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc308
-rw-r--r--compiler/optimizing/codegen_test.cc39
-rw-r--r--compiler/optimizing/common_arm64.h10
-rw-r--r--compiler/optimizing/loop_optimization.cc12
6 files changed, 293 insertions, 123 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 794e05c670..b39a0e43fa 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -68,6 +68,7 @@ using helpers::OperandFromMemOperand;
using helpers::OutputCPURegister;
using helpers::OutputFPRegister;
using helpers::OutputRegister;
+using helpers::QRegisterFrom;
using helpers::RegisterFrom;
using helpers::StackOperandFrom;
using helpers::VIXLRegCodeFromART;
@@ -1459,9 +1460,12 @@ void ParallelMoveResolverARM64::FinishEmitNativeCode() {
}
Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
- DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister ||
- kind == Location::kStackSlot || kind == Location::kDoubleStackSlot);
- kind = (kind == Location::kFpuRegister) ? Location::kFpuRegister : Location::kRegister;
+ DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
+ || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
+ || kind == Location::kSIMDStackSlot);
+ kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
+ ? Location::kFpuRegister
+ : Location::kRegister;
Location scratch = GetScratchLocation(kind);
if (!scratch.Equals(Location::NoLocation())) {
return scratch;
@@ -1471,7 +1475,9 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki
scratch = LocationFrom(vixl_temps_.AcquireX());
} else {
DCHECK(kind == Location::kFpuRegister);
- scratch = LocationFrom(vixl_temps_.AcquireD());
+ scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
+ ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
+ : vixl_temps_.AcquireD());
}
AddScratchLocation(scratch);
return scratch;
@@ -1482,7 +1488,7 @@ void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
vixl_temps_.Release(XRegisterFrom(loc));
} else {
DCHECK(loc.IsFpuRegister());
- vixl_temps_.Release(DRegisterFrom(loc));
+ vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
}
RemoveScratchLocation(loc);
}
@@ -1745,6 +1751,8 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
__ Ldr(dst, StackOperandFrom(source));
+ } else if (source.IsSIMDStackSlot()) {
+ __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
} else if (source.IsConstant()) {
DCHECK(CoherentConstantAndType(source, dst_type));
MoveConstant(dst, source.GetConstant());
@@ -1767,7 +1775,29 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
__ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
} else {
DCHECK(destination.IsFpuRegister());
- __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
+ if (GetGraph()->HasSIMD()) {
+ __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+ } else {
+ __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
+ }
+ }
+ }
+ } else if (destination.IsSIMDStackSlot()) {
+ if (source.IsFpuRegister()) {
+ __ Str(QRegisterFrom(source), StackOperandFrom(destination));
+ } else {
+ DCHECK(source.IsSIMDStackSlot());
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
+ Register temp = temps.AcquireX();
+ __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
+ __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
+ __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
+ __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
+ } else {
+ FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
+ __ Ldr(temp, StackOperandFrom(source));
+ __ Str(temp, StackOperandFrom(destination));
}
}
} else { // The destination is not a register. It must be a stack slot.
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 10d8b841f8..869aad2942 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -412,8 +412,9 @@ class CodeGeneratorARM64 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- // Allocated in D registers, which are word sized.
- return kArm64WordSize;
+ return GetGraph()->HasSIMD()
+ ? 2 * kArm64WordSize // 16 bytes == 2 arm64 words for each spill
+ : 1 * kArm64WordSize; // 8 bytes == 1 arm64 words for each spill
}
uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index f4874fe2bc..0923920366 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -22,7 +22,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces)
namespace art {
namespace arm64 {
-using helpers::DRegisterFrom;
+using helpers::VRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64ConstantFrom;
@@ -38,10 +38,12 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
@@ -53,25 +55,33 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc
void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Dup(dst.V8B(), InputRegisterAt(instruction, 0));
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Dup(dst.V4H(), InputRegisterAt(instruction, 0));
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
break;
case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Dup(dst.V2S(), InputRegisterAt(instruction, 0));
+ __ Dup(dst.V2D(), XRegisterFrom(locations->InAt(0)));
break;
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Dup(dst.V4S(), VRegisterFrom(locations->InAt(0)).V4S(), 0);
+ break;
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Dup(dst.V2S(), DRegisterFrom(locations->InAt(0)).V2S(), 0);
+ __ Dup(dst.V2D(), VRegisterFrom(locations->InAt(0)).V2D(), 0);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -109,7 +119,9 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
@@ -125,13 +137,13 @@ void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister src = DRegisterFrom(locations->InAt(0));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
Primitive::Type from = instruction->GetInputType();
Primitive::Type to = instruction->GetResultType();
if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Scvtf(dst.V2S(), src.V2S());
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Scvtf(dst.V4S(), src.V4S());
} else {
LOG(FATAL) << "Unsupported SIMD type";
}
@@ -143,25 +155,33 @@ void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister src = DRegisterFrom(locations->InAt(0));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Neg(dst.V8B(), src.V8B());
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Neg(dst.V16B(), src.V16B());
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Neg(dst.V4H(), src.V4H());
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Neg(dst.V8H(), src.V8H());
break;
case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Neg(dst.V4S(), src.V4S());
+ break;
+ case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Neg(dst.V2S(), src.V2S());
+ __ Neg(dst.V2D(), src.V2D());
break;
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fneg(dst.V4S(), src.V4S());
+ break;
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Fneg(dst.V2S(), src.V2S());
+ __ Fneg(dst.V2D(), src.V2D());
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -175,25 +195,33 @@ void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister src = DRegisterFrom(locations->InAt(0));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Abs(dst.V8B(), src.V8B());
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Abs(dst.V16B(), src.V16B());
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Abs(dst.V4H(), src.V4H());
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Abs(dst.V8H(), src.V8H());
break;
case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Abs(dst.V4S(), src.V4S());
+ break;
+ case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Abs(dst.V2S(), src.V2S());
+ __ Abs(dst.V2D(), src.V2D());
break;
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fabs(dst.V4S(), src.V4S());
+ break;
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Fabs(dst.V2S(), src.V2S());
+ __ Fabs(dst.V2D(), src.V2D());
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -206,19 +234,20 @@ void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister src = DRegisterFrom(locations->InAt(0));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean: // special case boolean-not
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Movi(dst.V8B(), 1);
- __ Eor(dst.V8B(), dst.V8B(), src.V8B());
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Movi(dst.V16B(), 1);
+ __ Eor(dst.V16B(), dst.V16B(), src.V16B());
break;
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- __ Not(dst.V8B(), src.V8B()); // lanes do not matter
+ case Primitive::kPrimLong:
+ __ Not(dst.V16B(), src.V16B()); // lanes do not matter
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -235,7 +264,9 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation*
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
@@ -252,26 +283,34 @@ void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister rhs = DRegisterFrom(locations->InAt(1));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Add(dst.V8B(), lhs.V8B(), rhs.V8B());
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Add(dst.V4H(), lhs.V4H(), rhs.V4H());
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
break;
case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Add(dst.V2S(), lhs.V2S(), rhs.V2S());
+ __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Fadd(dst.V2S(), lhs.V2S(), rhs.V2S());
+ __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -285,26 +324,34 @@ void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister rhs = DRegisterFrom(locations->InAt(1));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Sub(dst.V8B(), lhs.V8B(), rhs.V8B());
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Sub(dst.V4H(), lhs.V4H(), rhs.V4H());
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
break;
case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Sub(dst.V2S(), lhs.V2S(), rhs.V2S());
+ __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Fsub(dst.V2S(), lhs.V2S(), rhs.V2S());
+ __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -318,26 +365,30 @@ void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister rhs = DRegisterFrom(locations->InAt(1));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Mul(dst.V8B(), lhs.V8B(), rhs.V8B());
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Mul(dst.V4H(), lhs.V4H(), rhs.V4H());
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
break;
case Primitive::kPrimInt:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Mul(dst.V2S(), lhs.V2S(), rhs.V2S());
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
break;
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Fmul(dst.V2S(), lhs.V2S(), rhs.V2S());
+ __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -351,13 +402,17 @@ void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister rhs = DRegisterFrom(locations->InAt(1));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Fdiv(dst.V2S(), lhs.V2S(), rhs.V2S());
+ __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -371,17 +426,19 @@ void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister rhs = DRegisterFrom(locations->InAt(1));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
case Primitive::kPrimFloat:
- __ And(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ case Primitive::kPrimDouble:
+ __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -403,17 +460,19 @@ void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister rhs = DRegisterFrom(locations->InAt(1));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
case Primitive::kPrimFloat:
- __ Orr(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ case Primitive::kPrimDouble:
+ __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -427,17 +486,19 @@ void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister rhs = DRegisterFrom(locations->InAt(1));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
case Primitive::kPrimFloat:
- __ Eor(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ case Primitive::kPrimDouble:
+ __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -453,6 +514,7 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation*
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
@@ -469,22 +531,26 @@ void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Shl(dst.V8B(), lhs.V8B(), value);
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Shl(dst.V16B(), lhs.V16B(), value);
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Shl(dst.V4H(), lhs.V4H(), value);
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Shl(dst.V8H(), lhs.V8H(), value);
break;
case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Shl(dst.V4S(), lhs.V4S(), value);
+ break;
+ case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Shl(dst.V2S(), lhs.V2S(), value);
+ __ Shl(dst.V2D(), lhs.V2D(), value);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -498,22 +564,26 @@ void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Sshr(dst.V8B(), lhs.V8B(), value);
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sshr(dst.V16B(), lhs.V16B(), value);
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Sshr(dst.V4H(), lhs.V4H(), value);
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sshr(dst.V8H(), lhs.V8H(), value);
break;
case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sshr(dst.V4S(), lhs.V4S(), value);
+ break;
+ case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Sshr(dst.V2S(), lhs.V2S(), value);
+ __ Sshr(dst.V2D(), lhs.V2D(), value);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -527,22 +597,26 @@ void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
LocationSummary* locations = instruction->GetLocations();
- FPRegister lhs = DRegisterFrom(locations->InAt(0));
- FPRegister dst = DRegisterFrom(locations->Out());
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Ushr(dst.V8B(), lhs.V8B(), value);
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Ushr(dst.V16B(), lhs.V16B(), value);
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Ushr(dst.V4H(), lhs.V4H(), value);
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Ushr(dst.V8H(), lhs.V8H(), value);
break;
case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Ushr(dst.V4S(), lhs.V4S(), value);
+ break;
+ case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Ushr(dst.V2S(), lhs.V2S(), value);
+ __ Ushr(dst.V2D(), lhs.V2D(), value);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -561,7 +635,9 @@ static void CreateVecMemLocations(ArenaAllocator* arena,
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (is_load) {
@@ -613,22 +689,27 @@ void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
Location reg_loc = Location::NoLocation();
MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
- FPRegister reg = DRegisterFrom(reg_loc);
+ VRegister reg = VRegisterFrom(reg_loc);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Ld1(reg.V8B(), mem);
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Ld1(reg.V16B(), mem);
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Ld1(reg.V4H(), mem);
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Ld1(reg.V8H(), mem);
break;
case Primitive::kPrimInt:
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Ld1(reg.V4S(), mem);
+ break;
+ case Primitive::kPrimLong:
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Ld1(reg.V2S(), mem);
+ __ Ld1(reg.V2D(), mem);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -643,22 +724,27 @@ void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
Location reg_loc = Location::NoLocation();
MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
- FPRegister reg = DRegisterFrom(reg_loc);
+ VRegister reg = VRegisterFrom(reg_loc);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ St1(reg.V8B(), mem);
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ St1(reg.V16B(), mem);
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ St1(reg.V4H(), mem);
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ St1(reg.V8H(), mem);
break;
case Primitive::kPrimInt:
case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ St1(reg.V4S(), mem);
+ break;
+ case Primitive::kPrimLong:
+ case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ St1(reg.V2S(), mem);
+ __ St1(reg.V2D(), mem);
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index f8bbf68c1c..4ba5c5580f 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -769,6 +769,45 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
InternalCodeAllocator code_allocator;
codegen.Finalize(&code_allocator);
}
+
+// Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off.
+TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
+ std::unique_ptr<const Arm64InstructionSetFeatures> features(
+ Arm64InstructionSetFeatures::FromCppDefines());
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+ arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions());
+
+ codegen.Initialize();
+
+ graph->SetHasSIMD(true);
+ for (int i = 0; i < 2; i++) {
+ HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
+ move->AddMove(Location::SIMDStackSlot(0),
+ Location::SIMDStackSlot(257),
+ Primitive::kPrimDouble,
+ nullptr);
+ move->AddMove(Location::SIMDStackSlot(257),
+ Location::SIMDStackSlot(0),
+ Primitive::kPrimDouble,
+ nullptr);
+ move->AddMove(Location::FpuRegisterLocation(0),
+ Location::FpuRegisterLocation(1),
+ Primitive::kPrimDouble,
+ nullptr);
+ move->AddMove(Location::FpuRegisterLocation(1),
+ Location::FpuRegisterLocation(0),
+ Primitive::kPrimDouble,
+ nullptr);
+ codegen.GetMoveResolver()->EmitNativeCode(move);
+ graph->SetHasSIMD(false);
+ }
+
+ InternalCodeAllocator code_allocator;
+ codegen.Finalize(&code_allocator);
+}
+
#endif
#ifdef ART_ENABLE_CODEGEN_mips
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index d3f431e327..721f74eeee 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -92,6 +92,16 @@ inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg());
}
+inline vixl::aarch64::FPRegister QRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegister()) << location;
+ return vixl::aarch64::FPRegister::GetQRegFromCode(location.reg());
+}
+
+inline vixl::aarch64::FPRegister VRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegister()) << location;
+ return vixl::aarch64::FPRegister::GetVRegFromCode(location.reg());
+}
+
inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
DCHECK(location.IsFpuRegister()) << location;
return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg());
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index cf7acb36d1..4710b32e9c 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -776,21 +776,25 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
return false;
case kArm64:
// Allow vectorization for all ARM devices, because Android assumes that
- // ARMv8 AArch64 always supports advanced SIMD. For now, only D registers
- // (64-bit vectors) not Q registers (128-bit vectors).
+ // ARMv8 AArch64 always supports advanced SIMD.
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
*restrictions |= kNoDiv | kNoAbs;
- return TrySetVectorLength(8);
+ return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
*restrictions |= kNoDiv | kNoAbs;
- return TrySetVectorLength(4);
+ return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
+ return TrySetVectorLength(4);
+ case Primitive::kPrimLong:
+ *restrictions |= kNoDiv | kNoMul;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
+ return TrySetVectorLength(4);
+ case Primitive::kPrimDouble:
return TrySetVectorLength(2);
default:
return false;