summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm64.cc66
-rw-r--r--compiler/optimizing/code_generator_arm64.h16
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_neon.cc59
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_sve.cc203
-rw-r--r--compiler/optimizing/common_arm64.h8
-rw-r--r--compiler/optimizing/loop_optimization.cc33
-rw-r--r--compiler/optimizing/loop_optimization.h2
7 files changed, 210 insertions, 177 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index f5d78367fe..bdc5e2d9a3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -169,54 +169,6 @@ static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
-// Calculate memory accessing operand for save/restore live registers.
-static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
- LocationSummary* locations,
- int64_t spill_offset,
- bool is_save) {
- const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
- const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
- DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
- codegen->GetNumberOfCoreRegisters(),
- fp_spills,
- codegen->GetNumberOfFloatingPointRegisters()));
-
- CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
- const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
- DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
- CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
-
- MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
- UseScratchRegisterScope temps(masm);
-
- Register base = masm->StackPointer();
- int64_t core_spill_size = core_list.GetTotalSizeInBytes();
- int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
- int64_t reg_size = kXRegSizeInBytes;
- int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
- uint32_t ls_access_size = WhichPowerOf2(reg_size);
- if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
- !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
- // If the offset does not fit in the instruction's immediate field, use an alternate register
- // to compute the base address(float point registers spill base address).
- Register new_base = temps.AcquireSameSizeAs(base);
- __ Add(new_base, base, Operand(spill_offset + core_spill_size));
- base = new_base;
- spill_offset = -core_spill_size;
- int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
- DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
- DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
- }
-
- if (is_save) {
- __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
- __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
- } else {
- __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
- __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
- }
-}
-
void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
@@ -240,15 +192,15 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar
stack_offset += fp_reg_size;
}
- SaveRestoreLiveRegistersHelper(codegen,
- locations,
- codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true);
+ InstructionCodeGeneratorARM64* visitor =
+ down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
+ visitor->SaveLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
}
void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- SaveRestoreLiveRegistersHelper(codegen,
- locations,
- codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false);
+ InstructionCodeGeneratorARM64* visitor =
+ down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
+ visitor->RestoreLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
}
class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
@@ -997,6 +949,12 @@ bool CodeGeneratorARM64::ShouldUseSVE() const {
return GetInstructionSetFeatures().HasSVE();
}
+size_t CodeGeneratorARM64::GetSIMDRegisterWidth() const {
+ return SupportsPredicatedSIMD()
+ ? GetInstructionSetFeatures().GetSVEVectorLength() / kBitsPerByte
+ : vixl::aarch64::kQRegSizeInBytes;
+}
+
#define __ GetVIXLAssembler()->
void CodeGeneratorARM64::EmitJumpTables() {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index eb3e9546e0..d4546e5bd5 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -309,6 +309,10 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
virtual void LoadSIMDRegFromStack(Location destination, Location source) = 0;
virtual void MoveSIMDRegToSIMDReg(Location destination, Location source) = 0;
virtual void MoveToSIMDStackSlot(Location destination, Location source) = 0;
+ virtual void SaveLiveRegistersHelper(LocationSummary* locations,
+ int64_t spill_offset) = 0;
+ virtual void RestoreLiveRegistersHelper(LocationSummary* locations,
+ int64_t spill_offset) = 0;
protected:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
@@ -462,6 +466,8 @@ class InstructionCodeGeneratorARM64Neon : public InstructionCodeGeneratorARM64 {
void LoadSIMDRegFromStack(Location destination, Location source) override;
void MoveSIMDRegToSIMDReg(Location destination, Location source) override;
void MoveToSIMDStackSlot(Location destination, Location source) override;
+ void SaveLiveRegistersHelper(LocationSummary* locations, int64_t spill_offset) override;
+ void RestoreLiveRegistersHelper(LocationSummary* locations, int64_t spill_offset) override;
};
class LocationsBuilderARM64Neon : public LocationsBuilderARM64 {
@@ -495,8 +501,14 @@ class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 {
void LoadSIMDRegFromStack(Location destination, Location source) override;
void MoveSIMDRegToSIMDReg(Location destination, Location source) override;
void MoveToSIMDStackSlot(Location destination, Location source) override;
+ void SaveLiveRegistersHelper(LocationSummary* locations, int64_t spill_offset) override;
+ void RestoreLiveRegistersHelper(LocationSummary* locations, int64_t spill_offset) override;
private:
+ // Validate that instruction vector length and packed type are compliant with the SIMD
+ // register size (full SIMD register is used).
+ void ValidateVectorLength(HVecOperation* instr) const;
+
// Returns default predicate register which is used as governing vector predicate
// to implement predicated loop execution.
//
@@ -579,9 +591,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
return vixl::aarch64::kDRegSizeInBytes;
}
- size_t GetSIMDRegisterWidth() const override {
- return vixl::aarch64::kQRegSizeInBytes;
- }
+ size_t GetSIMDRegisterWidth() const override;
uintptr_t GetAddressOf(HBasicBlock* block) override {
vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index bd64166655..0fe9898635 100644
--- a/compiler/optimizing/code_generator_vector_arm64_neon.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -17,6 +17,7 @@
#include "code_generator_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "base/bit_utils_iterator.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
@@ -1590,6 +1591,64 @@ void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination
}
}
+// Calculate memory accessing operand for save/restore live registers.
+template <bool is_save>
+void SaveRestoreLiveRegistersHelperNeonImpl(CodeGeneratorARM64* codegen,
+ LocationSummary* locations,
+ int64_t spill_offset) {
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
+ DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
+ codegen->GetNumberOfCoreRegisters(),
+ fp_spills,
+ codegen->GetNumberOfFloatingPointRegisters()));
+
+ CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
+ const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
+ DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+ CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
+
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+
+ Register base = masm->StackPointer();
+ int64_t core_spill_size = core_list.GetTotalSizeInBytes();
+ int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
+ int64_t reg_size = kXRegSizeInBytes;
+ int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
+ uint32_t ls_access_size = WhichPowerOf2(reg_size);
+ if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
+ !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
+ // If the offset does not fit in the instruction's immediate field, use an alternate register
+ // to compute the base address(float point registers spill base address).
+ Register new_base = temps.AcquireSameSizeAs(base);
+ masm->Add(new_base, base, Operand(spill_offset + core_spill_size));
+ base = new_base;
+ spill_offset = -core_spill_size;
+ int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
+ DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
+ DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
+ }
+
+ if (is_save) {
+ masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
+ masm->StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
+ } else {
+ masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
+ masm->LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
+ }
+}
+
+void InstructionCodeGeneratorARM64Neon::SaveLiveRegistersHelper(LocationSummary* locations,
+ int64_t spill_offset) {
+ SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ true>(codegen_, locations, spill_offset);
+}
+
+void InstructionCodeGeneratorARM64Neon::RestoreLiveRegistersHelper(LocationSummary* locations,
+ int64_t spill_offset) {
+ SaveRestoreLiveRegistersHelperNeonImpl</* is_save= */ false>(codegen_, locations, spill_offset);
+}
+
#undef __
} // namespace arm64
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index 2254673337..824b6c9476 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -17,6 +17,7 @@
#include "code_generator_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "base/bit_utils_iterator.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
@@ -33,6 +34,7 @@ using helpers::LocationFrom;
using helpers::OutputRegister;
using helpers::QRegisterFrom;
using helpers::StackOperandFrom;
+using helpers::SveStackOperandFrom;
using helpers::VRegisterFrom;
using helpers::ZRegisterFrom;
using helpers::XRegisterFrom;
@@ -71,6 +73,11 @@ inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruct
return Location::RequiresRegister();
}
+void InstructionCodeGeneratorARM64Sve::ValidateVectorLength(HVecOperation* instr) const {
+ DCHECK_EQ(DataType::Size(instr->GetPackedType()) * instr->GetVectorLength(),
+ codegen_->GetSIMDRegisterWidth());
+}
+
void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
HInstruction* input = instruction->InputAt(0);
@@ -107,11 +114,11 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScal
LocationSummary* locations = instruction->GetLocations();
Location src_loc = locations->InAt(0);
const ZRegister dst = ZRegisterFrom(locations->Out());
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
__ Dup(dst.VnB(), Int64FromLocation(src_loc));
} else {
@@ -120,7 +127,6 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScal
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
__ Dup(dst.VnH(), Int64FromLocation(src_loc));
} else {
@@ -128,7 +134,6 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScal
}
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
__ Dup(dst.VnS(), Int64FromLocation(src_loc));
} else {
@@ -136,7 +141,6 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScal
}
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
__ Dup(dst.VnD(), Int64FromLocation(src_loc));
} else {
@@ -144,7 +148,6 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScal
}
break;
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
__ Fdup(dst.VnS(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
} else {
@@ -152,7 +155,6 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScal
}
break;
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
__ Fdup(dst.VnD(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
} else {
@@ -193,19 +195,16 @@ void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar*
DCHECK(instruction->IsPredicated());
LocationSummary* locations = instruction->GetLocations();
const VRegister src = VRegisterFrom(locations->InAt(0));
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Umov(OutputRegister(instruction), src.V4S(), 0);
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Umov(OutputRegister(instruction), src.V2D(), 0);
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- DCHECK_LE(2u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 4u);
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
@@ -251,9 +250,9 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const VRegister dst = DRegisterFrom(locations->Out());
const PRegister p_reg = LoopPReg();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Saddv(dst.S(), p_reg, src.VnS());
@@ -264,7 +263,6 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
}
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Uaddv(dst.D(), p_reg, src.VnD());
@@ -292,8 +290,8 @@ void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
const PRegisterM p_reg = LoopPReg().Merging();
DataType::Type from = instruction->GetInputType();
DataType::Type to = instruction->GetResultType();
+ ValidateVectorLength(instruction);
if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Scvtf(dst.VnS(), p_reg, src.VnS());
} else {
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -310,31 +308,26 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Neg(dst.VnB(), p_reg, src.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Neg(dst.VnH(), p_reg, src.VnH());
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Neg(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Neg(dst.VnD(), p_reg, src.VnD());
break;
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Fneg(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Fneg(dst.VnD(), p_reg, src.VnD());
break;
default:
@@ -353,29 +346,24 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Abs(dst.VnB(), p_reg, src.VnB());
break;
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Abs(dst.VnH(), p_reg, src.VnH());
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Abs(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Abs(dst.VnD(), p_reg, src.VnD());
break;
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Fabs(dst.VnS(), p_reg, src.VnS());
break;
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Fabs(dst.VnD(), p_reg, src.VnD());
break;
default:
@@ -394,9 +382,9 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool: // special case boolean-not
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Dup(dst.VnB(), 1);
__ Eor(dst.VnB(), p_reg, dst.VnB(), src.VnB());
break;
@@ -454,31 +442,26 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Add(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Add(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Add(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Add(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Fadd(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
break;
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Fadd(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
break;
default:
@@ -518,31 +501,26 @@ void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Sub(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Sub(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Sub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Sub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Fsub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Fsub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
default:
@@ -572,31 +550,26 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Mul(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Mul(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Mul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Mul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Fmul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
break;
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Fmul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
break;
default:
@@ -616,15 +589,14 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
// Note: VIXL guarantees StrictNaNPropagation for Fdiv.
switch (instruction->GetPackedType()) {
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Fdiv(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
break;
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Fdiv(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
break;
default:
@@ -665,6 +637,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -709,6 +682,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -744,6 +718,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -799,23 +774,20 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Lsl(dst.VnB(), p_reg, lhs.VnB(), value);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Lsl(dst.VnH(), p_reg, lhs.VnH(), value);
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Lsl(dst.VnS(), p_reg, lhs.VnS(), value);
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Lsl(dst.VnD(), p_reg, lhs.VnD(), value);
break;
default:
@@ -835,23 +807,20 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Asr(dst.VnB(), p_reg, lhs.VnB(), value);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Asr(dst.VnH(), p_reg, lhs.VnH(), value);
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Asr(dst.VnS(), p_reg, lhs.VnS(), value);
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Asr(dst.VnD(), p_reg, lhs.VnD(), value);
break;
default:
@@ -871,23 +840,20 @@ void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
const ZRegister dst = ZRegisterFrom(locations->Out());
const PRegisterM p_reg = LoopPReg().Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Lsr(dst.VnB(), p_reg, lhs.VnB(), value);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Lsr(dst.VnH(), p_reg, lhs.VnH(), value);
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Lsr(dst.VnS(), p_reg, lhs.VnS(), value);
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Lsr(dst.VnD(), p_reg, lhs.VnD(), value);
break;
default:
@@ -943,26 +909,23 @@ void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instru
if (IsZeroBitPattern(instruction->InputAt(0))) {
return;
}
+ ValidateVectorLength(instruction);
// Set required elements.
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
break;
case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
break;
default:
@@ -1009,11 +972,11 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
const PRegisterM p_reg = LoopPReg().Merging();
DCHECK(locations->InAt(0).Equals(locations->Out()));
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
} else {
@@ -1022,7 +985,6 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
} else {
@@ -1030,7 +992,6 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
}
break;
case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
} else {
@@ -1077,12 +1038,11 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction)
DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
HVecOperation::ToSignedType(b->GetPackedType()));
DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
- DCHECK_EQ(4u, instruction->GetVectorLength());
+ ValidateVectorLength(instruction);
size_t inputs_data_size = DataType::Size(a->GetPackedType());
switch (inputs_data_size) {
case 1u: {
- DCHECK_EQ(16u, a->GetVectorLength());
UseScratchRegisterScope temps(GetVIXLAssembler());
const ZRegister tmp0 = temps.AcquireZ();
const ZRegister tmp1 = ZRegisterFrom(locations->GetTemp(0));
@@ -1143,30 +1103,27 @@ void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register scratch;
const PRegisterZ p_reg = LoopPReg().Zeroing();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
case DataType::Type::kUint16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Ld1h(reg.VnH(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Ld1b(reg.VnB(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Ld1w(reg.VnS(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Ld1d(reg.VnD(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
@@ -1188,30 +1145,27 @@ void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register scratch;
const PRegisterZ p_reg = LoopPReg().Zeroing();
+ ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ St1b(reg.VnB(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ St1h(reg.VnH(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ St1w(reg.VnS(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ St1d(reg.VnD(), p_reg,
VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
@@ -1237,22 +1191,18 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instru
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
__ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
__ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
__ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
__ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
break;
default:
@@ -1295,17 +1245,19 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct
Register left = InputRegisterAt(instruction, 0);
Register right = InputRegisterAt(instruction, 1);
- switch (instruction->GetVectorLength()) {
- case 16u:
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
+
+ switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
+ case 1u:
__ Whilelo(LoopPReg().VnB(), left, right);
break;
- case 8u:
+ case 2u:
__ Whilelo(LoopPReg().VnH(), left, right);
break;
case 4u:
__ Whilelo(LoopPReg().VnS(), left, right);
break;
- case 2u:
+ case 8u:
__ Whilelo(LoopPReg().VnD(), left, right);
break;
default:
@@ -1333,52 +1285,103 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition*
Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
vixl::aarch64::UseScratchRegisterScope* scope) {
- DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
- return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
+ return LocationFrom(scope->AcquireZ());
}
void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
vixl::aarch64::UseScratchRegisterScope* scope) {
- DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
- scope->Release(QRegisterFrom(loc));
+ scope->Release(ZRegisterFrom(loc));
}
void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
Location source) {
- DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
- __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
+ __ Ldr(ZRegisterFrom(destination), SveStackOperandFrom(source));
}
void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
Location source) {
- DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
- __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+ __ Mov(ZRegisterFrom(destination), ZRegisterFrom(source));
}
void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
Location source) {
DCHECK(destination.IsSIMDStackSlot());
- DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
if (source.IsFpuRegister()) {
- __ Str(QRegisterFrom(source), StackOperandFrom(destination));
+ __ Str(ZRegisterFrom(source), SveStackOperandFrom(destination));
} else {
DCHECK(source.IsSIMDStackSlot());
UseScratchRegisterScope temps(GetVIXLAssembler());
if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
+ // Very rare situation, only when there are cycles in ParallelMoveResolver graph.
const Register temp = temps.AcquireX();
- __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
- __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
- __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
- __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
+ DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % kArm64WordSize, 0u);
+ // Emit a number of LDR/STR (XRegister, 64-bit) to cover the whole SIMD register size
+ // when copying a stack slot.
+ for (size_t offset = 0, e = codegen_->GetSIMDRegisterWidth();
+ offset < e;
+ offset += kArm64WordSize) {
+ __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + offset));
+ __ Str(temp, MemOperand(sp, destination.GetStackIndex() + offset));
+ }
} else {
- const VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
- __ Ldr(temp, StackOperandFrom(source));
- __ Str(temp, StackOperandFrom(destination));
+ const ZRegister temp = temps.AcquireZ();
+ __ Ldr(temp, SveStackOperandFrom(source));
+ __ Str(temp, SveStackOperandFrom(destination));
}
}
}
+template <bool is_save>
+void SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64* codegen,
+ LocationSummary* locations,
+ int64_t spill_offset) {
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
+ DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
+ codegen->GetNumberOfCoreRegisters(),
+ fp_spills,
+ codegen->GetNumberOfFloatingPointRegisters()));
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
+ Register base = masm->StackPointer();
+
+ CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
+ int64_t core_spill_size = core_list.GetTotalSizeInBytes();
+ int64_t fp_spill_offset = spill_offset + core_spill_size;
+
+ if (codegen->GetGraph()->HasSIMD()) {
+ if (is_save) {
+ masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
+ } else {
+ masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
+ }
+ codegen->GetAssembler()->SaveRestoreZRegisterList<is_save>(fp_spills, fp_spill_offset);
+ return;
+ }
+
+ // Case when we only need to restore D-registers.
+ DCHECK(!codegen->GetGraph()->HasSIMD());
+ DCHECK_LE(codegen->GetSlowPathFPWidth(), kDRegSizeInBytes);
+ CPURegList fp_list = CPURegList(CPURegister::kVRegister, kDRegSize, fp_spills);
+ if (is_save) {
+ masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
+ masm->StoreCPURegList(fp_list, MemOperand(base, fp_spill_offset));
+ } else {
+ masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
+ masm->LoadCPURegList(fp_list, MemOperand(base, fp_spill_offset));
+ }
+}
+
+void InstructionCodeGeneratorARM64Sve::SaveLiveRegistersHelper(LocationSummary* locations,
+ int64_t spill_offset) {
+ SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ true>(codegen_, locations, spill_offset);
+}
+
+void InstructionCodeGeneratorARM64Sve::RestoreLiveRegistersHelper(LocationSummary* locations,
+ int64_t spill_offset) {
+ SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ false>(codegen_, locations, spill_offset);
+}
+
#undef __
} // namespace arm64
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 72207816e1..81c6561318 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -182,6 +182,10 @@ inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
return vixl::aarch64::MemOperand(vixl::aarch64::sp, location.GetStackIndex());
}
+inline vixl::aarch64::SVEMemOperand SveStackOperandFrom(Location location) {
+ return vixl::aarch64::SVEMemOperand(vixl::aarch64::sp, location.GetStackIndex());
+}
+
inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
size_t offset = 0) {
// A heap reference must be 32bit, so fit in a W register.
@@ -215,6 +219,10 @@ inline Location LocationFrom(const vixl::aarch64::VRegister& fpreg) {
return Location::FpuRegisterLocation(fpreg.GetCode());
}
+inline Location LocationFrom(const vixl::aarch64::ZRegister& zreg) {
+ return Location::FpuRegisterLocation(zreg.GetCode());
+}
+
inline vixl::aarch64::Operand OperandFromMemOperand(
const vixl::aarch64::MemOperand& mem_op) {
if (mem_op.IsImmediateOffset()) {
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 1210dbe67b..02ee4ec057 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -946,9 +946,10 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
// make one particular reference aligned), never to exceed (1).
// (3) variable to record how many references share same alignment.
// (4) variable to record suitable candidate for dynamic loop peeling.
- uint32_t desired_alignment = GetVectorSizeInBytes();
- DCHECK_LE(desired_alignment, 16u);
- uint32_t peeling_votes[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ size_t desired_alignment = GetVectorSizeInBytes();
+ ScopedArenaVector<uint32_t> peeling_votes(desired_alignment, 0u,
+ loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+
uint32_t max_num_same_alignment = 0;
const ArrayReference* peeling_candidate = nullptr;
@@ -1577,14 +1578,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
uint32_t HLoopOptimization::GetVectorSizeInBytes() {
- if (kIsDebugBuild) {
- InstructionSet isa = compiler_options_->GetInstructionSet();
- // TODO: Remove this check when there are no implicit assumptions on the SIMD reg size.
- DCHECK_EQ(simd_register_size_, (isa == InstructionSet::kArm || isa == InstructionSet::kThumb2)
- ? 8u
- : 16u);
- }
-
return simd_register_size_;
}
@@ -1616,6 +1609,8 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
if (IsInPredicatedVectorizationMode()) {
// SVE vectorization.
CHECK(features->AsArm64InstructionSetFeatures()->HasSVE());
+ size_t vector_length = simd_register_size_ / DataType::Size(type);
+ DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u);
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -1625,7 +1620,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
kNoUnsignedHAdd |
kNoUnroundedHAdd |
kNoSAD;
- return TrySetVectorLength(type, 16);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
*restrictions |= kNoDiv |
@@ -1634,19 +1629,19 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
kNoUnroundedHAdd |
kNoSAD |
kNoDotProd;
- return TrySetVectorLength(type, 8);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kInt32:
*restrictions |= kNoDiv | kNoSAD;
- return TrySetVectorLength(type, 4);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kInt64:
*restrictions |= kNoDiv | kNoSAD;
- return TrySetVectorLength(type, 2);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kFloat32:
*restrictions |= kNoReduction;
- return TrySetVectorLength(type, 4);
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kFloat64:
*restrictions |= kNoReduction;
- return TrySetVectorLength(type, 2);
+ return TrySetVectorLength(type, vector_length);
default:
break;
}
@@ -2311,12 +2306,12 @@ Alignment HLoopOptimization::ComputeAlignment(HInstruction* offset,
return Alignment(DataType::Size(type), 0);
}
-void HLoopOptimization::SetAlignmentStrategy(uint32_t peeling_votes[],
+void HLoopOptimization::SetAlignmentStrategy(const ScopedArenaVector<uint32_t>& peeling_votes,
const ArrayReference* peeling_candidate) {
// Current heuristic: pick the best static loop peeling factor, if any,
// or otherwise use dynamic loop peeling on suggested peeling candidate.
uint32_t max_vote = 0;
- for (int32_t i = 0; i < 16; i++) {
+ for (size_t i = 0; i < peeling_votes.size(); i++) {
if (peeling_votes[i] > max_vote) {
max_vote = peeling_votes[i];
vector_static_peeling_factor_ = i;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 0d76804d9c..d3583ed8a6 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -238,7 +238,7 @@ class HLoopOptimization : public HOptimization {
DataType::Type type,
bool is_string_char_at,
uint32_t peeling = 0);
- void SetAlignmentStrategy(uint32_t peeling_votes[],
+ void SetAlignmentStrategy(const ScopedArenaVector<uint32_t>& peeling_votes,
const ArrayReference* peeling_candidate);
uint32_t MaxNumberPeeled();
bool IsVectorizationProfitable(int64_t trip_count);