summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator.h3
-rw-r--r--compiler/optimizing/code_generator_arm64.cc92
-rw-r--r--compiler/optimizing/code_generator_arm64.h112
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_neon.cc (renamed from compiler/optimizing/code_generator_vector_arm64.cc)215
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_sve.cc1540
-rw-r--r--compiler/optimizing/nodes.h10
6 files changed, 1834 insertions, 138 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 84bf4914d0..ff2be4740d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -223,6 +223,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual const Assembler& GetAssembler() const = 0;
virtual size_t GetWordSize() const = 0;
+ // Returns whether the target supports predicated SIMD instructions.
+ virtual bool SupportsPredicatedSIMD() const { return false; }
+
// Get FP register width in bytes for spilling/restoring in the slow paths.
//
// Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 001fcb1b0a..09c801b454 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -75,7 +75,6 @@ using helpers::OperandFromMemOperand;
using helpers::OutputCPURegister;
using helpers::OutputFPRegister;
using helpers::OutputRegister;
-using helpers::QRegisterFrom;
using helpers::RegisterFrom;
using helpers::StackOperandFrom;
using helpers::VIXLRegCodeFromART;
@@ -177,6 +176,7 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
+ DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
@@ -426,10 +426,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
- SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD.
+ SaveLiveRegisters(codegen, locations); // Only saves live vector regs for SIMD.
arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
- RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD.
+ RestoreLiveRegisters(codegen, locations); // Only restores live vector regs for SIMD.
if (successor_ == nullptr) {
__ B(GetReturnLabel());
} else {
@@ -883,8 +883,10 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
stats),
block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- location_builder_(graph, this),
- instruction_visitor_(graph, this),
+ location_builder_neon_(graph, this),
+ instruction_visitor_neon_(graph, this),
+ location_builder_sve_(graph, this),
+ instruction_visitor_sve_(graph, this),
move_resolver_(graph->GetAllocator(), this),
assembler_(graph->GetAllocator(),
compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
@@ -909,6 +911,19 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
// Save the link register (containing the return address) to mimic Quick.
AddAllocatedRegister(LocationFrom(lr));
+
+ bool use_sve = ShouldUseSVE();
+ if (use_sve) {
+ location_builder_ = &location_builder_sve_;
+ instruction_visitor_ = &instruction_visitor_sve_;
+ } else {
+ location_builder_ = &location_builder_neon_;
+ instruction_visitor_ = &instruction_visitor_neon_;
+ }
+}
+
+bool CodeGeneratorARM64::ShouldUseSVE() const {
+ return kArm64AllowSVE && GetInstructionSetFeatures().HasSVE();
}
#define __ GetVIXLAssembler()->
@@ -1038,9 +1053,9 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki
scratch = LocationFrom(vixl_temps_.AcquireX());
} else {
DCHECK_EQ(kind, Location::kFpuRegister);
- scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
- ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
- : vixl_temps_.AcquireD());
+ scratch = codegen_->GetGraph()->HasSIMD()
+ ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
+ : LocationFrom(vixl_temps_.AcquireD());
}
AddScratchLocation(scratch);
return scratch;
@@ -1051,7 +1066,11 @@ void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
vixl_temps_.Release(XRegisterFrom(loc));
} else {
DCHECK(loc.IsFpuRegister());
- vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
+ if (codegen_->GetGraph()->HasSIMD()) {
+ codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
+ } else {
+ vixl_temps_.Release(DRegisterFrom(loc));
+ }
}
RemoveScratchLocation(loc);
}
@@ -1434,7 +1453,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
__ Ldr(dst, StackOperandFrom(source));
} else if (source.IsSIMDStackSlot()) {
- __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
+ GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
} else if (source.IsConstant()) {
DCHECK(CoherentConstantAndType(source, dst_type));
MoveConstant(dst, source.GetConstant());
@@ -1458,30 +1477,14 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
} else {
DCHECK(destination.IsFpuRegister());
if (GetGraph()->HasSIMD()) {
- __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+ GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
} else {
__ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
}
}
}
} else if (destination.IsSIMDStackSlot()) {
- if (source.IsFpuRegister()) {
- __ Str(QRegisterFrom(source), StackOperandFrom(destination));
- } else {
- DCHECK(source.IsSIMDStackSlot());
- UseScratchRegisterScope temps(GetVIXLAssembler());
- if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
- Register temp = temps.AcquireX();
- __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
- __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
- __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
- __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
- } else {
- VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
- __ Ldr(temp, StackOperandFrom(source));
- __ Str(temp, StackOperandFrom(destination));
- }
- }
+ GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
} else { // The destination is not a register. It must be a stack slot.
DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
if (source.IsRegister() || source.IsFpuRegister()) {
@@ -6372,6 +6375,39 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_
}
}
+MemOperand InstructionCodeGeneratorARM64::VecNeonAddress(
+ HVecMemoryOperation* instruction,
+ UseScratchRegisterScope* temps_scope,
+ size_t size,
+ bool is_string_char_at,
+ /*out*/ Register* scratch) {
+ LocationSummary* locations = instruction->GetLocations();
+ Register base = InputRegisterAt(instruction, 0);
+
+ if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
+ DCHECK(!is_string_char_at);
+ return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
+ }
+
+ Location index = locations->InAt(1);
+ uint32_t offset = is_string_char_at
+ ? mirror::String::ValueOffset().Uint32Value()
+ : mirror::Array::DataOffset(size).Uint32Value();
+ size_t shift = ComponentSizeShiftWidth(size);
+
+ // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
+ DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
+
+ if (index.IsConstant()) {
+ offset += Int64FromLocation(index) << shift;
+ return HeapOperand(base, offset);
+ } else {
+ *scratch = temps_scope->AcquireSameSizeAs(base);
+ __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
+ return HeapOperand(*scratch, offset);
+ }
+}
+
#undef __
#undef QUICK_ENTRY_POINT
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 47a019424e..627cf72645 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -53,6 +53,9 @@ static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes;
+// SVE is currently not enabled.
+static constexpr bool kArm64AllowSVE = false;
+
static const vixl::aarch64::Register kParameterCoreRegisters[] = {
vixl::aarch64::x1,
vixl::aarch64::x2,
@@ -262,7 +265,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
#define DECLARE_VISIT_INSTRUCTION(name, super) \
void Visit##name(H##name* instr) override;
- FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
@@ -276,7 +279,15 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
Arm64Assembler* GetAssembler() const { return assembler_; }
vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
- private:
+ // SIMD helpers.
+ virtual Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) = 0;
+ virtual void FreeSIMDScratchLocation(Location loc,
+ vixl::aarch64::UseScratchRegisterScope* scope) = 0;
+ virtual void LoadSIMDRegFromStack(Location destination, Location source) = 0;
+ virtual void MoveSIMDRegToSIMDReg(Location destination, Location source) = 0;
+ virtual void MoveToSIMDStackSlot(Location destination, Location source) = 0;
+
+ protected:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
vixl::aarch64::Register class_reg);
void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
@@ -340,7 +351,11 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void GenerateIntRemForPower2Denom(HRem *instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
- vixl::aarch64::MemOperand VecAddress(
+ // Helper to set up locations for vector memory operations. Returns the memory operand and,
+ // if used, sets the output parameter scratch to a temporary register used in this operand,
+ // so that the client can release it right after the memory operand use.
+ // Neon version.
+ vixl::aarch64::MemOperand VecNeonAddress(
HVecMemoryOperation* instruction,
// This function may acquire a scratch register.
vixl::aarch64::UseScratchRegisterScope* temps_scope,
@@ -362,7 +377,7 @@ class LocationsBuilderARM64 : public HGraphVisitor {
#define DECLARE_VISIT_INSTRUCTION(name, super) \
void Visit##name(H##name* instr) override;
- FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
@@ -373,7 +388,7 @@ class LocationsBuilderARM64 : public HGraphVisitor {
<< " (id " << instruction->GetId() << ")";
}
- private:
+ protected:
void HandleBinaryOp(HBinaryOperation* instr);
void HandleFieldSet(HInstruction* instruction);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -387,6 +402,72 @@ class LocationsBuilderARM64 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
};
+class InstructionCodeGeneratorARM64Neon : public InstructionCodeGeneratorARM64 {
+ public:
+ InstructionCodeGeneratorARM64Neon(HGraph* graph, CodeGeneratorARM64* codegen) :
+ InstructionCodeGeneratorARM64(graph, codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+ void Visit##name(H##name* instr) override;
+
+ FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) override;
+ void FreeSIMDScratchLocation(Location loc,
+ vixl::aarch64::UseScratchRegisterScope* scope) override;
+ void LoadSIMDRegFromStack(Location destination, Location source) override;
+ void MoveSIMDRegToSIMDReg(Location destination, Location source) override;
+ void MoveToSIMDStackSlot(Location destination, Location source) override;
+};
+
+class LocationsBuilderARM64Neon : public LocationsBuilderARM64 {
+ public:
+ LocationsBuilderARM64Neon(HGraph* graph, CodeGeneratorARM64* codegen) :
+ LocationsBuilderARM64(graph, codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+ void Visit##name(H##name* instr) override;
+
+ FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+};
+
+class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 {
+ public:
+ InstructionCodeGeneratorARM64Sve(HGraph* graph, CodeGeneratorARM64* codegen) :
+ InstructionCodeGeneratorARM64(graph, codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+ void Visit##name(H##name* instr) override;
+
+ FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) override;
+ void FreeSIMDScratchLocation(Location loc,
+ vixl::aarch64::UseScratchRegisterScope* scope) override;
+ void LoadSIMDRegFromStack(Location destination, Location source) override;
+ void MoveSIMDRegToSIMDReg(Location destination, Location source) override;
+ void MoveToSIMDStackSlot(Location destination, Location source) override;
+};
+
+class LocationsBuilderARM64Sve : public LocationsBuilderARM64 {
+ public:
+ LocationsBuilderARM64Sve(HGraph* graph, CodeGeneratorARM64* codegen) :
+ LocationsBuilderARM64(graph, codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+ void Visit##name(H##name* instr) override;
+
+ FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+};
+
class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
public:
ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
@@ -435,6 +516,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
return kArm64WordSize;
}
+ bool SupportsPredicatedSIMD() const override { return ShouldUseSVE(); }
+
size_t GetSlowPathFPWidth() const override {
return GetGraph()->HasSIMD()
? GetSIMDRegisterWidth()
@@ -455,8 +538,11 @@ class CodeGeneratorARM64 : public CodeGenerator {
return block_entry_label->GetLocation();
}
- HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
- HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
+ HGraphVisitor* GetLocationBuilder() override { return location_builder_; }
+ InstructionCodeGeneratorARM64* GetInstructionCodeGeneratorArm64() {
+ return instruction_visitor_;
+ }
+ HGraphVisitor* GetInstructionVisitor() override { return GetInstructionCodeGeneratorArm64(); }
Arm64Assembler* GetAssembler() override { return &assembler_; }
const Arm64Assembler& GetAssembler() const override { return assembler_; }
vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
@@ -899,14 +985,22 @@ class CodeGeneratorARM64 : public CodeGenerator {
static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
ArenaVector<linker::LinkerPatch>* linker_patches);
+ // Returns whether SVE features are supported and should be used.
+ bool ShouldUseSVE() const;
+
// Labels for each block that will be compiled.
// We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
ArenaDeque<vixl::aarch64::Label> block_labels_; // Indexed by block id.
vixl::aarch64::Label frame_entry_label_;
ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
- LocationsBuilderARM64 location_builder_;
- InstructionCodeGeneratorARM64 instruction_visitor_;
+ LocationsBuilderARM64Neon location_builder_neon_;
+ InstructionCodeGeneratorARM64Neon instruction_visitor_neon_;
+ LocationsBuilderARM64Sve location_builder_sve_;
+ InstructionCodeGeneratorARM64Sve instruction_visitor_sve_;
+
+ LocationsBuilderARM64* location_builder_;
+ InstructionCodeGeneratorARM64* instruction_visitor_;
ParallelMoveResolverARM64 move_resolver_;
Arm64Assembler assembler_;
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index df95c88c07..78720c3635 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -31,9 +31,11 @@ using helpers::DRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64FromLocation;
+using helpers::LocationFrom;
using helpers::OutputRegister;
+using helpers::QRegisterFrom;
+using helpers::StackOperandFrom;
using helpers::VRegisterFrom;
-using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
#define __ GetVIXLAssembler()->
@@ -47,7 +49,7 @@ static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_)
return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
}
-void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
HInstruction* input = instruction->InputAt(0);
switch (instruction->GetPackedType()) {
@@ -78,7 +80,7 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc
}
}
-void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location src_loc = locations->InAt(0);
VRegister dst = VRegisterFrom(locations->Out());
@@ -140,7 +142,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
}
}
-void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
@@ -164,7 +166,7 @@ void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction
}
}
-void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister src = VRegisterFrom(locations->InAt(0));
switch (instruction->GetPackedType()) {
@@ -215,11 +217,11 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
}
}
-void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
+void LocationsBuilderARM64Neon::VisitVecReduce(HVecReduce* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecReduce(HVecReduce* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister src = VRegisterFrom(locations->InAt(0));
VRegister dst = DRegisterFrom(locations->Out());
@@ -255,11 +257,11 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
+void LocationsBuilderARM64Neon::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecCnv(HVecCnv* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister src = VRegisterFrom(locations->InAt(0));
VRegister dst = VRegisterFrom(locations->Out());
@@ -273,11 +275,11 @@ void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
+void LocationsBuilderARM64Neon::VisitVecNeg(HVecNeg* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecNeg(HVecNeg* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister src = VRegisterFrom(locations->InAt(0));
VRegister dst = VRegisterFrom(locations->Out());
@@ -314,11 +316,11 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
+void LocationsBuilderARM64Neon::VisitVecAbs(HVecAbs* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecAbs(HVecAbs* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister src = VRegisterFrom(locations->InAt(0));
VRegister dst = VRegisterFrom(locations->Out());
@@ -353,11 +355,11 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
+void LocationsBuilderARM64Neon::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecNot(HVecNot* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister src = VRegisterFrom(locations->InAt(0));
VRegister dst = VRegisterFrom(locations->Out());
@@ -404,11 +406,11 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
}
}
-void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecAdd(HVecAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecAdd(HVecAdd* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -446,11 +448,11 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -478,11 +480,11 @@ void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* ins
}
}
-void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -518,11 +520,11 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi
}
}
-void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSub(HVecSub* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -560,11 +562,11 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -592,11 +594,11 @@ void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* ins
}
}
-void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
+void LocationsBuilderARM64Neon::VisitVecMul(HVecMul* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecMul(HVecMul* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -630,11 +632,11 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
+void LocationsBuilderARM64Neon::VisitVecDiv(HVecDiv* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecDiv(HVecDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -654,11 +656,11 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
+void LocationsBuilderARM64Neon::VisitVecMin(HVecMin* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecMin(HVecMin* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -702,11 +704,11 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
+void LocationsBuilderARM64Neon::VisitVecMax(HVecMax* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecMax(HVecMax* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -750,12 +752,12 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecAnd(HVecAnd* instruction) {
// TODO: Allow constants supported by BIC (vector, immediate).
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecAnd(HVecAnd* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -778,20 +780,20 @@ void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
+void LocationsBuilderARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
}
-void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
// TODO: Use BIC (vector, register).
LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
}
-void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
+void LocationsBuilderARM64Neon::VisitVecOr(HVecOr* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecOr(HVecOr* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -814,11 +816,11 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
+void LocationsBuilderARM64Neon::VisitVecXor(HVecXor* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecXor(HVecXor* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -861,11 +863,11 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
}
}
-void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
+void LocationsBuilderARM64Neon::VisitVecShl(HVecShl* instruction) {
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecShl(HVecShl* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister dst = VRegisterFrom(locations->Out());
@@ -895,11 +897,11 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
+void LocationsBuilderARM64Neon::VisitVecShr(HVecShr* instruction) {
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecShr(HVecShr* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister dst = VRegisterFrom(locations->Out());
@@ -929,11 +931,11 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
+void LocationsBuilderARM64Neon::VisitVecUShr(HVecUShr* instruction) {
CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecUShr(HVecUShr* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister lhs = VRegisterFrom(locations->InAt(0));
VRegister dst = VRegisterFrom(locations->Out());
@@ -963,7 +965,7 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
@@ -995,7 +997,7 @@ void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
}
}
-void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister dst = VRegisterFrom(locations->Out());
@@ -1057,14 +1059,14 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
}
}
-void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+void LocationsBuilderARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
}
// Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
// 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
// However vector MultiplyAccumulate instruction is not affected.
-void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister acc = VRegisterFrom(locations->InAt(0));
VRegister left = VRegisterFrom(locations->InAt(1));
@@ -1105,7 +1107,7 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum
}
}
-void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
// Some conversions require temporary registers.
LocationSummary* locations = instruction->GetLocations();
@@ -1147,7 +1149,7 @@ void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction
}
}
-void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
LocationSummary* locations = instruction->GetLocations();
VRegister acc = VRegisterFrom(locations->InAt(0));
VRegister left = VRegisterFrom(locations->InAt(1));
@@ -1287,7 +1289,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
}
}
-void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -1302,7 +1304,7 @@ void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) {
}
}
-void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
LocationSummary* locations = instruction->GetLocations();
DCHECK(locations->InAt(0).Equals(locations->Out()));
VRegister acc = VRegisterFrom(locations->InAt(0));
@@ -1392,47 +1394,11 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
}
-// Helper to set up locations for vector memory operations. Returns the memory operand and,
-// if used, sets the output parameter scratch to a temporary register used in this operand,
-// so that the client can release it right after the memory operand use.
-MemOperand InstructionCodeGeneratorARM64::VecAddress(
- HVecMemoryOperation* instruction,
- UseScratchRegisterScope* temps_scope,
- size_t size,
- bool is_string_char_at,
- /*out*/ Register* scratch) {
- LocationSummary* locations = instruction->GetLocations();
- Register base = InputRegisterAt(instruction, 0);
-
- if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
- DCHECK(!is_string_char_at);
- return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
- }
-
- Location index = locations->InAt(1);
- uint32_t offset = is_string_char_at
- ? mirror::String::ValueOffset().Uint32Value()
- : mirror::Array::DataOffset(size).Uint32Value();
- size_t shift = ComponentSizeShiftWidth(size);
-
- // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
- DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
-
- if (index.IsConstant()) {
- offset += Int64FromLocation(index) << shift;
- return HeapOperand(base, offset);
- } else {
- *scratch = temps_scope->AcquireSameSizeAs(base);
- __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
- return HeapOperand(*scratch, offset);
- }
-}
-
-void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
+void LocationsBuilderARM64Neon::VisitVecLoad(HVecLoad* instruction) {
CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
}
-void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecLoad(HVecLoad* instruction) {
LocationSummary* locations = instruction->GetLocations();
size_t size = DataType::Size(instruction->GetPackedType());
VRegister reg = VRegisterFrom(locations->Out());
@@ -1456,7 +1422,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
temps.Release(length); // no longer needed
// Zero extend 8 compressed bytes into 8 chars.
__ Ldr(DRegisterFrom(locations->Out()).V8B(),
- VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
+ VecNeonAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
__ Uxtl(reg.V8H(), reg.V8B());
__ B(&done);
if (scratch.IsValid()) {
@@ -1464,7 +1430,8 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
}
// Load 8 direct uncompressed chars.
__ Bind(&uncompressed_load);
- __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
+ __ Ldr(reg,
+ VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
__ Bind(&done);
return;
}
@@ -1478,7 +1445,8 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
case DataType::Type::kFloat64:
DCHECK_LE(2u, instruction->GetVectorLength());
DCHECK_LE(instruction->GetVectorLength(), 16u);
- __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
+ __ Ldr(reg,
+ VecNeonAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1486,11 +1454,11 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
+void LocationsBuilderARM64Neon::VisitVecStore(HVecStore* instruction) {
CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
}
-void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecStore(HVecStore* instruction) {
LocationSummary* locations = instruction->GetLocations();
size_t size = DataType::Size(instruction->GetPackedType());
VRegister reg = VRegisterFrom(locations->InAt(2));
@@ -1509,7 +1477,8 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
case DataType::Type::kFloat64:
DCHECK_LE(2u, instruction->GetVectorLength());
DCHECK_LE(instruction->GetVectorLength(), 16u);
- __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
+ __ Str(reg,
+ VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1517,6 +1486,54 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
}
}
+Location InstructionCodeGeneratorARM64Neon::AllocateSIMDScratchLocation(
+ vixl::aarch64::UseScratchRegisterScope* scope) {
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
+}
+
+void InstructionCodeGeneratorARM64Neon::FreeSIMDScratchLocation(Location loc,
+ vixl::aarch64::UseScratchRegisterScope* scope) {
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ scope->Release(QRegisterFrom(loc));
+}
+
+void InstructionCodeGeneratorARM64Neon::LoadSIMDRegFromStack(Location destination,
+ Location source) {
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
+}
+
+void InstructionCodeGeneratorARM64Neon::MoveSIMDRegToSIMDReg(Location destination,
+ Location source) {
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+}
+
+void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination,
+ Location source) {
+ DCHECK(destination.IsSIMDStackSlot());
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+
+ if (source.IsFpuRegister()) {
+ __ Str(QRegisterFrom(source), StackOperandFrom(destination));
+ } else {
+ DCHECK(source.IsSIMDStackSlot());
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
+ Register temp = temps.AcquireX();
+ __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
+ __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
+ __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
+ __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
+ } else {
+ VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
+ __ Ldr(temp, StackOperandFrom(source));
+ __ Str(temp, StackOperandFrom(destination));
+ }
+ }
+}
+
#undef __
} // namespace arm64
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
new file mode 100644
index 0000000000..5460ff28dd
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -0,0 +1,1540 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm64.h"
+
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "mirror/array-inl.h"
+#include "mirror/string.h"
+
+using namespace vixl::aarch64; // NOLINT(build/namespaces)
+
+namespace art {
+namespace arm64 {
+
+using helpers::ARM64EncodableConstantOrRegister;
+using helpers::Arm64CanEncodeConstantAsImmediate;
+using helpers::DRegisterFrom;
+using helpers::HeapOperand;
+using helpers::InputRegisterAt;
+using helpers::Int64FromLocation;
+using helpers::LocationFrom;
+using helpers::OutputRegister;
+using helpers::QRegisterFrom;
+using helpers::StackOperandFrom;
+using helpers::VRegisterFrom;
+using helpers::XRegisterFrom;
+
+#define __ GetVIXLAssembler()->
+
+// Build-time switch for Armv8.4-a dot product instructions.
+// TODO: Enable dot product when there is a device to test it on.
+static constexpr bool kArm64EmitDotProdInstructions = false;
+
+// Returns whether dot product instructions should be emitted.
+static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
+ return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
+}
+
+void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ HInstruction* input = instruction->InputAt(0);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ if (input->IsConstant() &&
+ Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
+ locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location src_loc = locations->InAt(0);
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (src_loc.IsConstant()) {
+ __ Movi(dst.V16B(), Int64FromLocation(src_loc));
+ } else {
+ __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+ }
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (src_loc.IsConstant()) {
+ __ Movi(dst.V8H(), Int64FromLocation(src_loc));
+ } else {
+ __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+ }
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (src_loc.IsConstant()) {
+ __ Movi(dst.V4S(), Int64FromLocation(src_loc));
+ } else {
+ __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+ }
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (src_loc.IsConstant()) {
+ __ Movi(dst.V2D(), Int64FromLocation(src_loc));
+ } else {
+ __ Dup(dst.V2D(), XRegisterFrom(src_loc));
+ }
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (src_loc.IsConstant()) {
+ __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
+ } else {
+ __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
+ }
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (src_loc.IsConstant()) {
+ __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
+ } else {
+ __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Umov(OutputRegister(instruction), src.V4S(), 0);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Umov(OutputRegister(instruction), src.V2D(), 0);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (allocator) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(),
+ instruction->IsVecNot() ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetReductionKind()) {
+ case HVecReduce::kSum:
+ __ Addv(dst.S(), src.V4S());
+ break;
+ case HVecReduce::kMin:
+ __ Sminv(dst.S(), src.V4S());
+ break;
+ case HVecReduce::kMax:
+ __ Smaxv(dst.S(), src.V4S());
+ break;
+ }
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ switch (instruction->GetReductionKind()) {
+ case HVecReduce::kSum:
+ __ Addp(dst.D(), src.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD min/max";
+ UNREACHABLE();
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
+ DataType::Type from = instruction->GetInputType();
+ DataType::Type to = instruction->GetResultType();
+ if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Scvtf(dst.V4S(), src.V4S());
+ } else {
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Neg(dst.V16B(), src.V16B());
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Neg(dst.V8H(), src.V8H());
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Neg(dst.V4S(), src.V4S());
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Neg(dst.V2D(), src.V2D());
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fneg(dst.V4S(), src.V4S());
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fneg(dst.V2D(), src.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Abs(dst.V16B(), src.V16B());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Abs(dst.V8H(), src.V8H());
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Abs(dst.V4S(), src.V4S());
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Abs(dst.V2D(), src.V2D());
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fabs(dst.V4S(), src.V4S());
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fabs(dst.V2D(), src.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool: // special case boolean-not
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Movi(dst.V16B(), 1);
+ __ Eor(dst.V16B(), dst.V16B(), src.V16B());
+ break;
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ __ Not(dst.V16B(), src.V16B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (allocator) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ instruction->IsRounded()
+ ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+ : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ instruction->IsRounded()
+ ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+ : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ instruction->IsRounded()
+ ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+ : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ instruction->IsRounded()
+ ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+ : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kUint32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
+ // TODO: Allow constants supported by BIC (vector, immediate).
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
+ // TODO: Use BIC (vector, register).
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (allocator) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Shl(dst.V16B(), lhs.V16B(), value);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Shl(dst.V8H(), lhs.V8H(), value);
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Shl(dst.V4S(), lhs.V4S(), value);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Shl(dst.V2D(), lhs.V2D(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sshr(dst.V16B(), lhs.V16B(), value);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sshr(dst.V8H(), lhs.V8H(), value);
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sshr(dst.V4S(), lhs.V4S(), value);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sshr(dst.V2D(), lhs.V2D(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister dst = VRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Ushr(dst.V16B(), lhs.V16B(), value);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Ushr(dst.V8H(), lhs.V8H(), value);
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Ushr(dst.V4S(), lhs.V4S(), value);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Ushr(dst.V2D(), lhs.V2D(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister dst = VRegisterFrom(locations->Out());
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ Movi(dst.V16B(), 0);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
+ LocationSummary* locations = new (allocator) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+// Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
+// 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
+// However vector MultiplyAccumulate instruction is not affected.
+void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister acc = VRegisterFrom(locations->InAt(0));
+ VRegister left = VRegisterFrom(locations->InAt(1));
+ VRegister right = VRegisterFrom(locations->InAt(2));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
+ __ Mla(acc.V16B(), left.V16B(), right.V16B());
+ } else {
+ __ Mls(acc.V16B(), left.V16B(), right.V16B());
+ }
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
+ __ Mla(acc.V8H(), left.V8H(), right.V8H());
+ } else {
+ __ Mls(acc.V8H(), left.V8H(), right.V8H());
+ }
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
+ __ Mla(acc.V4S(), left.V4S(), right.V4S());
+ } else {
+ __ Mls(acc.V4S(), left.V4S(), right.V4S());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
+ // Some conversions require temporary registers.
+ LocationSummary* locations = instruction->GetLocations();
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ switch (a->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt64:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kInt32:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ break;
+ }
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ if (instruction->GetPackedType() == DataType::Type::kInt64) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ if (instruction->GetPackedType() == a->GetPackedType()) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister acc = VRegisterFrom(locations->InAt(0));
+ VRegister left = VRegisterFrom(locations->InAt(1));
+ VRegister right = VRegisterFrom(locations->InAt(2));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ switch (a->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sabal(acc.V8H(), left.V8B(), right.V8B());
+ __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
+ break;
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ __ Sxtl(tmp1.V8H(), left.V8B());
+ __ Sxtl(tmp2.V8H(), right.V8B());
+ __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
+ __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
+ __ Sxtl2(tmp1.V8H(), left.V16B());
+ __ Sxtl2(tmp2.V8H(), right.V16B());
+ __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
+ __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
+ VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
+ __ Sxtl(tmp1.V8H(), left.V8B());
+ __ Sxtl(tmp2.V8H(), right.V8B());
+ __ Sxtl(tmp3.V4S(), tmp1.V4H());
+ __ Sxtl(tmp4.V4S(), tmp2.V4H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp3.V4S(), tmp1.V8H());
+ __ Sxtl2(tmp4.V4S(), tmp2.V8H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp1.V8H(), left.V16B());
+ __ Sxtl2(tmp2.V8H(), right.V16B());
+ __ Sxtl(tmp3.V4S(), tmp1.V4H());
+ __ Sxtl(tmp4.V4S(), tmp2.V4H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp3.V4S(), tmp1.V8H());
+ __ Sxtl2(tmp4.V4S(), tmp2.V8H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sabal(acc.V4S(), left.V4H(), right.V4H());
+ __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
+ break;
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ __ Sxtl(tmp1.V4S(), left.V4H());
+ __ Sxtl(tmp2.V4S(), right.V4H());
+ __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
+ __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
+ __ Sxtl2(tmp1.V4S(), left.V8H());
+ __ Sxtl2(tmp2.V4S(), right.V8H());
+ __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
+ __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Sub(tmp.V4S(), left.V4S(), right.V4S());
+ __ Abs(tmp.V4S(), tmp.V4S());
+ __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
+ break;
+ }
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sabal(acc.V2D(), left.V2S(), right.V2S());
+ __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Sub(tmp.V2D(), left.V2D(), right.V2D());
+ __ Abs(tmp.V2D(), tmp.V2D());
+ __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+
+ // For Int8 and Uint8 general case we need a temp register.
+ if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
+ !ShouldEmitDotProductInstructions(codegen_)) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ VRegister acc = VRegisterFrom(locations->InAt(0));
+ VRegister left = VRegisterFrom(locations->InAt(1));
+ VRegister right = VRegisterFrom(locations->InAt(2));
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+
+ size_t inputs_data_size = DataType::Size(a->GetPackedType());
+ switch (inputs_data_size) {
+ case 1u: {
+ DCHECK_EQ(16u, a->GetVectorLength());
+ if (instruction->IsZeroExtending()) {
+ if (ShouldEmitDotProductInstructions(codegen_)) {
+ __ Udot(acc.V4S(), left.V16B(), right.V16B());
+ } else {
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Umull(tmp.V8H(), left.V8B(), right.V8B());
+ __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+
+ __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
+ __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ }
+ } else {
+ if (ShouldEmitDotProductInstructions(codegen_)) {
+ __ Sdot(acc.V4S(), left.V16B(), right.V16B());
+ } else {
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Smull(tmp.V8H(), left.V8B(), right.V8B());
+ __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+
+ __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
+ __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ }
+ }
+ break;
+ }
+ case 2u:
+ DCHECK_EQ(8u, a->GetVectorLength());
+ if (instruction->IsZeroExtending()) {
+ __ Umlal(acc.V4S(), left.V4H(), right.V4H());
+ __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
+ } else {
+ __ Smlal(acc.V4S(), left.V4H(), right.V4H());
+ __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
+ }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* allocator,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (allocator) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
+ CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = DataType::Size(instruction->GetPackedType());
+ VRegister reg = VRegisterFrom(locations->Out());
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register scratch;
+
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ // Special handling of compressed/uncompressed string load.
+ if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ vixl::aarch64::Label uncompressed_load, done;
+ // Test compression bit.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ Register length = temps.AcquireW();
+ __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
+ __ Tbnz(length.W(), 0, &uncompressed_load);
+ temps.Release(length); // no longer needed
+ // Zero extend 8 compressed bytes into 8 chars.
+ __ Ldr(DRegisterFrom(locations->Out()).V8B(),
+ VecNeonAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
+ __ Uxtl(reg.V8H(), reg.V8B());
+ __ B(&done);
+ if (scratch.IsValid()) {
+ temps.Release(scratch); // if used, no longer needed
+ }
+ // Load 8 direct uncompressed chars.
+ __ Bind(&uncompressed_load);
+ __ Ldr(reg,
+ VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
+ __ Bind(&done);
+ return;
+ }
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kInt32:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat64:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ Ldr(reg,
+ VecNeonAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
+ CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ size_t size = DataType::Size(instruction->GetPackedType());
+ VRegister reg = VRegisterFrom(locations->InAt(2));
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register scratch;
+
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat64:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ Str(reg,
+ VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
+ vixl::aarch64::UseScratchRegisterScope* scope) {
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
+}
+
+void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
+ vixl::aarch64::UseScratchRegisterScope* scope) {
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ scope->Release(QRegisterFrom(loc));
+}
+
+void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
+ Location source) {
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
+}
+
+void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
+ Location source) {
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+}
+
+void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
+ Location source) {
+ DCHECK(destination.IsSIMDStackSlot());
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+
+ if (source.IsFpuRegister()) {
+ __ Str(QRegisterFrom(source), StackOperandFrom(destination));
+ } else {
+ DCHECK(source.IsSIMDStackSlot());
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
+ Register temp = temps.AcquireX();
+ __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
+ __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
+ __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
+ __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
+ } else {
+ VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
+ __ Ldr(temp, StackOperandFrom(source));
+ __ Str(temp, StackOperandFrom(destination));
+ }
+ }
+}
+
+#undef __
+
+} // namespace arm64
+} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 342789348c..e02a393c1b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1387,7 +1387,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator);
};
-#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \
+#define FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M) \
M(Above, Condition) \
M(AboveOrEqual, Condition) \
M(Abs, UnaryOperation) \
@@ -1477,7 +1477,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(TryBoundary, Instruction) \
M(TypeConversion, Instruction) \
M(UShr, BinaryOperation) \
- M(Xor, BinaryOperation) \
+ M(Xor, BinaryOperation)
+
+#define FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(M) \
M(VecReplicateScalar, VecUnaryOperation) \
M(VecExtractScalar, VecUnaryOperation) \
M(VecReduce, VecUnaryOperation) \
@@ -1508,6 +1510,10 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(VecLoad, VecMemoryOperation) \
M(VecStore, VecMemoryOperation) \
+#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \
+ FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M) \
+ FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(M)
+
/*
* Instructions, shared across several (not all) architectures.
*/