Avoid excessive spill slots for slow paths. Reducing the frame size makes stack maps smaller as we need fewer bits for stack masks and some dex register locations may use short location kind rather than long. On Nexus 9, AOSP ToT, the boot.oat size reduction is prebuilt multi-part boot image: - 32-bit boot.oat: -416KiB (-0.6%) - 64-bit boot.oat: -635KiB (-0.9%) prebuilt multi-part boot image with read barrier: - 32-bit boot.oat: -483KiB (-0.7%) - 64-bit boot.oat: -703KiB (-0.9%) on-device built single boot image: - 32-bit boot.oat: -380KiB (-0.6%) - 64-bit boot.oat: -632KiB (-0.9%) on-device built single boot image with read barrier: - 32-bit boot.oat: -448KiB (-0.6%) - 64-bit boot.oat: -692KiB (-0.9%) The other benefit is that at runtime, threads may need fewer pages for their stacks, reducing overall memory usage. We defer the calculation of the maximum spill size from the main register allocator (linear scan or graph coloring) to the RegisterAllocationResolver and do it based on the live registers at slow path safepoints. The old notion of an artificial slow path safepoint interval is removed as it is no longer needed. Test: Run ART test suite on host and Nexus 9. Bug: 30212852 Change-Id: I40b3d114e278e2c5807982904fa49bf6642c6275

commit: 70e97462116a47ef2e582ea29a037847debcc029 [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Tue Aug 09 11:04:26 2016 +0100
committer: Vladimir Marko <vmarko@google.com> Mon Sep 05 17:27:41 2016 +0100
tree: ee587e35b9b9483c35875ccc8ddea139978ca823
parent: 521691ae4dfad47cf6b46858347fa5fa32fd7bcc [diff] [blame]
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index fd396c4..072d8cf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h

@@ -22,6 +22,7 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/bit_field.h"
+#include "base/bit_utils.h"
 #include "base/enums.h"
 #include "compiled_method.h"
 #include "driver/compiler_options.h"
@@ -212,8 +213,7 @@
   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
   void InitializeCodeGeneration(size_t number_of_spill_slots,
-                                size_t maximum_number_of_live_core_registers,
-                                size_t maximum_number_of_live_fpu_registers,
+                                size_t maximum_safepoint_spill_size,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
   // Backends can override this as necessary. For most, no special alignment is required.
@@ -279,6 +279,30 @@
     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
   }
 
+  uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
+    DCHECK(locations->OnlyCallsOnSlowPath() ||
+           (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
+               !locations->HasCustomSlowPathCallingConvention()));
+    uint32_t live_registers = core_registers
+        ? locations->GetLiveRegisters()->GetCoreRegisters()
+        : locations->GetLiveRegisters()->GetFloatingPointRegisters();
+    if (locations->HasCustomSlowPathCallingConvention()) {
+      // Save only the live registers that the custom calling convention wants us to save.
+      uint32_t caller_saves = core_registers
+          ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
+          : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
+      return live_registers & caller_saves;
+    } else {
+      // Default ABI, we need to spill non-callee-save live registers.
+      uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
+      return live_registers & ~callee_saves;
+    }
+  }
+
+  size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
+    return POPCOUNT(GetSlowPathSpills(locations, core_registers));
+  }
+
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
   // Check whether we have already recorded mapping at this PC.
commit	70e97462116a47ef2e582ea29a037847debcc029	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Tue Aug 09 11:04:26 2016 +0100
committer	Vladimir Marko <vmarko@google.com>	Mon Sep 05 17:27:41 2016 +0100
tree	ee587e35b9b9483c35875ccc8ddea139978ca823
parent	521691ae4dfad47cf6b46858347fa5fa32fd7bcc [diff] [blame]