Opt compiler: ARM64: Use ldp/stp on arm64 for slow paths.
It should be a bit faster than load/store single registers and reduce
the code size.
Change-Id: I67b8302adf6174b7bb728f7c2afd2c237e34ffde
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index e536b2d..9b3cf8a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -77,8 +77,8 @@
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
- void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
- void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+ virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
+ virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc);
bool IsCoreRegisterSaved(int reg) const {
@@ -97,11 +97,13 @@
return saved_fpu_stack_offsets_[reg];
}
- private:
+ protected:
static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
static constexpr uint32_t kRegisterNotSaved = -1;
uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
+
+ private:
DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
};
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1c6debd..7e9cdac 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -64,6 +64,7 @@
using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
using helpers::ARM64EncodableConstantOrRegister;
+using helpers::ArtVixlRegCodeCoherentForRegSet;
static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
static constexpr int kCurrentMethodStackOffset = 0;
@@ -105,6 +106,88 @@
#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value()
+// Calculate memory accessing operand for save/restore live registers.
+static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
+ RegisterSet* register_set,
+ int64_t spill_offset,
+ bool is_save) {
+ DCHECK(ArtVixlRegCodeCoherentForRegSet(register_set->GetCoreRegisters(),
+ codegen->GetNumberOfCoreRegisters(),
+ register_set->GetFloatingPointRegisters(),
+ codegen->GetNumberOfFloatingPointRegisters()));
+
+ CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize,
+ register_set->GetCoreRegisters() & (~callee_saved_core_registers.list()));
+ CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize,
+ register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list()));
+
+ MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+
+ Register base = masm->StackPointer();
+ int64_t core_spill_size = core_list.TotalSizeInBytes();
+ int64_t fp_spill_size = fp_list.TotalSizeInBytes();
+ int64_t reg_size = kXRegSizeInBytes;
+ int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
+ uint32_t ls_access_size = WhichPowerOf2(reg_size);
+ if (((core_list.Count() > 1) || (fp_list.Count() > 1)) &&
+ !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
+ // If the offset does not fit in the instruction's immediate field, use an alternate register
+ // to compute the base address(float point registers spill base address).
+ Register new_base = temps.AcquireSameSizeAs(base);
+ __ Add(new_base, base, Operand(spill_offset + core_spill_size));
+ base = new_base;
+ spill_offset = -core_spill_size;
+ int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
+ DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
+ DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
+ }
+
+ if (is_save) {
+ __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
+ __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
+ } else {
+ __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
+ __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
+ }
+}
+
+void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+ RegisterSet* register_set = locations->GetLiveRegisters();
+ size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (!codegen->IsCoreCalleeSaveRegister(i) && register_set->ContainsCoreRegister(i)) {
+ // If the register holds an object, update the stack mask.
+ if (locations->RegisterContainsObject(i)) {
+ locations->SetStackBit(stack_offset / kVRegSize);
+ }
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_core_stack_offsets_[i] = stack_offset;
+ stack_offset += kXRegSizeInBytes;
+ }
+ }
+
+ for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
+ if (!codegen->IsFloatingPointCalleeSaveRegister(i) &&
+ register_set->ContainsFloatingPointRegister(i)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_fpu_stack_offsets_[i] = stack_offset;
+ stack_offset += kDRegSizeInBytes;
+ }
+ }
+
+ SaveRestoreLiveRegistersHelper(codegen, register_set,
+ codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
+}
+
+void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+ RegisterSet* register_set = locations->GetLiveRegisters();
+ SaveRestoreLiveRegistersHelper(codegen, register_set,
+ codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
+}
+
class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
public:
BoundsCheckSlowPathARM64(HBoundsCheck* instruction,
@@ -527,6 +610,19 @@
GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
}
+vixl::CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
+ DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
+ return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
+ core_spill_mask_);
+}
+
+vixl::CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
+ DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
+ GetNumberOfFloatingPointRegisters()));
+ return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
+ fpu_spill_mask_);
+}
+
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
__ Bind(GetLabelOf(block));
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 5a35867..913d881 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -70,6 +70,9 @@
vixl::Label* GetEntryLabel() { return &entry_label_; }
vixl::Label* GetExitLabel() { return &exit_label_; }
+ void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
+ void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
+
private:
vixl::Label entry_label_;
vixl::Label exit_label_;
@@ -232,15 +235,8 @@
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
- vixl::CPURegList GetFramePreservedCoreRegisters() const {
- return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
- core_spill_mask_);
- }
-
- vixl::CPURegList GetFramePreservedFPRegisters() const {
- return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
- fpu_spill_mask_);
- }
+ vixl::CPURegList GetFramePreservedCoreRegisters() const;
+ vixl::CPURegList GetFramePreservedFPRegisters() const;
void Bind(HBasicBlock* block) OVERRIDE;
@@ -282,10 +278,10 @@
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
// The number of registers that can be allocated. The register allocator may
// decide to reserve and not use a few of them.
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 53f1f3c..246fff9 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -218,6 +218,28 @@
return Location::RequiresRegister();
}
+// Check if registers in art register set have the same register code in vixl. If the register
+// codes are same, we can initialize vixl register list simply by the register masks. Currently,
+// only SP/WSP and ZXR/WZR codes are different between art and vixl.
+// Note: This function is only used for debug checks.
+static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
+ size_t num_core,
+ uint32_t art_fpu_registers,
+ size_t num_fpu) {
+ // The register masks won't work if the number of register is larger than 32.
+ DCHECK_GE(sizeof(art_core_registers) * 8, num_core);
+ DCHECK_GE(sizeof(art_fpu_registers) * 8, num_fpu);
+ for (size_t art_reg_code = 0; art_reg_code < num_core; ++art_reg_code) {
+ if (RegisterSet::Contains(art_core_registers, art_reg_code)) {
+ if (art_reg_code != static_cast<size_t>(VIXLRegCodeFromART(art_reg_code))) {
+ return false;
+ }
+ }
+ }
+ // There is no register code translation for float registers.
+ return true;
+}
+
} // namespace helpers
} // namespace arm64
} // namespace art