ART: ARM64: Optimize frame size for SIMD graphs.
For SIMD graphs allocate 64 bit instead of 128 bit on stack for
each FP register to be preserved by the callee in the frame entry
as ABI suggests (currently 64-bit registers are preserved but
more space on stack is allocated).
Note: slow paths still require spilling full 128-bit Q-Registers
for SIMD graphs due to register allocator restrictions.
Test: test-art-target.
Change-Id: Ie0b12e4b769158445f3d0f4562c70d4fb0ea7744
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 48a8320..a75c745 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1249,7 +1249,7 @@
} else {
__ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
}
- return GetFloatingPointSpillSlotSize();
+ return GetSlowPathFPWidth();
}
size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
@@ -1258,7 +1258,7 @@
} else {
__ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
}
- return GetFloatingPointSpillSlotSize();
+ return GetSlowPathFPWidth();
}
void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -1377,7 +1377,7 @@
__ subq(CpuRegister(RSP), Immediate(adjust));
__ cfi().AdjustCFAOffset(adjust);
uint32_t xmm_spill_location = GetFpuSpillStart();
- size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
+ size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
@@ -1405,7 +1405,7 @@
__ cfi().RememberState();
if (!HasEmptyFrame()) {
uint32_t xmm_spill_location = GetFpuSpillStart();
- size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
+ size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
int offset = xmm_spill_location + (xmm_spill_slot_size * i);