Avoid excessive spill slots for slow paths.
Reducing the frame size makes stack maps smaller as we need
fewer bits for stack masks and some dex register locations
may use short location kind rather than long. On Nexus 9,
AOSP ToT, the boot.oat size reduction is
prebuilt multi-part boot image:
- 32-bit boot.oat: -416KiB (-0.6%)
- 64-bit boot.oat: -635KiB (-0.9%)
prebuilt multi-part boot image with read barrier:
- 32-bit boot.oat: -483KiB (-0.7%)
- 64-bit boot.oat: -703KiB (-0.9%)
on-device built single boot image:
- 32-bit boot.oat: -380KiB (-0.6%)
- 64-bit boot.oat: -632KiB (-0.9%)
on-device built single boot image with read barrier:
- 32-bit boot.oat: -448KiB (-0.6%)
- 64-bit boot.oat: -692KiB (-0.9%)
The other benefit is that at runtime, threads may need fewer
pages for their stacks, reducing overall memory usage.
We defer the calculation of the maximum spill size from
the main register allocator (linear scan or graph coloring)
to the RegisterAllocationResolver and do it based on the
live registers at slow path safepoints. The old notion of
an artificial slow path safepoint interval is removed as
it is no longer needed.
Test: Run ART test suite on host and Nexus 9.
Bug: 30212852
Change-Id: I40b3d114e278e2c5807982904fa49bf6642c6275
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index fd396c4..072d8cf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,7 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/bit_field.h"
+#include "base/bit_utils.h"
#include "base/enums.h"
#include "compiled_method.h"
#include "driver/compiler_options.h"
@@ -212,8 +213,7 @@
virtual size_t GetFloatingPointSpillSlotSize() const = 0;
virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
void InitializeCodeGeneration(size_t number_of_spill_slots,
- size_t maximum_number_of_live_core_registers,
- size_t maximum_number_of_live_fpu_registers,
+ size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order);
// Backends can override this as necessary. For most, no special alignment is required.
@@ -279,6 +279,30 @@
return (fpu_callee_save_mask_ & (1 << reg)) != 0;
}
+ uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
+ DCHECK(locations->OnlyCallsOnSlowPath() ||
+ (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
+ !locations->HasCustomSlowPathCallingConvention()));
+ uint32_t live_registers = core_registers
+ ? locations->GetLiveRegisters()->GetCoreRegisters()
+ : locations->GetLiveRegisters()->GetFloatingPointRegisters();
+ if (locations->HasCustomSlowPathCallingConvention()) {
+ // Save only the live registers that the custom calling convention wants us to save.
+ uint32_t caller_saves = core_registers
+ ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
+ : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
+ return live_registers & caller_saves;
+ } else {
+ // Default ABI, we need to spill non-callee-save live registers.
+ uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
+ return live_registers & ~callee_saves;
+ }
+ }
+
+ size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
+ return POPCOUNT(GetSlowPathSpills(locations, core_registers));
+ }
+
// Record native to dex mapping for a suspend point. Required by runtime.
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
// Check whether we have already recorded mapping at this PC.