ARM64: Improve code generated to spill/restore for slow paths.
Aligning the accesses allows generating better code.
Before:
add x16, sp, #0x44 (68)
stp x0, x1, [x16, #-16]
After:
stp x0, x1, [sp, #56]
Change-Id: I3e20ad3fa59d00aee4b4d14ea9d59c7cd546509e
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index d40e2b9..9c6dcaa 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -291,7 +291,8 @@
DCHECK(!block_order.empty());
DCHECK(block_order[0] == GetGraph()->GetEntryBlock());
ComputeSpillMask();
- first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
+ first_register_slot_in_slow_path_ = RoundUp(
+ (number_of_out_slots + number_of_spill_slots) * kVRegSize, GetPreferredSlotsAlignment());
if (number_of_spill_slots == 0
&& !HasAllocatedCalleeSaveRegisters()
@@ -302,8 +303,7 @@
SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
} else {
SetFrameSize(RoundUp(
- number_of_spill_slots * kVRegSize
- + number_of_out_slots * kVRegSize
+ first_register_slot_in_slow_path_
+ maximum_number_of_live_core_registers * GetWordSize()
+ maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
+ FrameEntrySpillSize(),
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b8540ba..47e6625 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -211,6 +211,8 @@
size_t maximum_number_of_live_fpu_registers,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order);
+ // Backends can override this as necessary. For most, no special alignment is required.
+ virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
uint32_t GetFrameSize() const { return frame_size_; }
void SetFrameSize(uint32_t size) { frame_size_ = size; }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index c2f055a..b35c520 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -418,6 +418,10 @@
block_labels_.resize(GetGraph()->GetBlocks().size());
}
+ // We want to use the STP and LDP instructions to spill and restore registers for slow paths.
+ // These instructions can only encode offsets that are multiples of the register size accessed.
+ uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::kXRegSizeInBytes; }
+
JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARM64(switch_instr));
return jump_tables_.back().get();