Avoid excessive spill slots for slow paths.
Reducing the frame size makes stack maps smaller as we need
fewer bits for stack masks and some dex register locations
may use short location kind rather than long. On Nexus 9,
AOSP ToT, the boot.oat size reduction is
prebuilt multi-part boot image:
- 32-bit boot.oat: -416KiB (-0.6%)
- 64-bit boot.oat: -635KiB (-0.9%)
prebuilt multi-part boot image with read barrier:
- 32-bit boot.oat: -483KiB (-0.7%)
- 64-bit boot.oat: -703KiB (-0.9%)
on-device built single boot image:
- 32-bit boot.oat: -380KiB (-0.6%)
- 64-bit boot.oat: -632KiB (-0.9%)
on-device built single boot image with read barrier:
- 32-bit boot.oat: -448KiB (-0.6%)
- 64-bit boot.oat: -692KiB (-0.9%)
The other benefit is that at runtime, threads may need fewer
pages for their stacks, reducing overall memory usage.
We defer the calculation of the maximum spill size from
the main register allocator (linear scan or graph coloring)
to the RegisterAllocationResolver and do it based on the
live registers at slow path safepoints. The old notion of
an artificial slow path safepoint interval is removed as
it is no longer needed.
Test: Run ART test suite on host and Nexus 9.
Bug: 30212852
Change-Id: I40b3d114e278e2c5807982904fa49bf6642c6275
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index c532e72..6a4ad5c 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -283,8 +283,7 @@
}
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
- size_t maximum_number_of_live_core_registers,
- size_t maximum_number_of_live_fpu_registers,
+ size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order) {
block_order_ = &block_order;
@@ -298,14 +297,12 @@
&& !HasAllocatedCalleeSaveRegisters()
&& IsLeafMethod()
&& !RequiresCurrentMethod()) {
- DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
- DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u);
+ DCHECK_EQ(maximum_safepoint_spill_size, 0u);
SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
} else {
SetFrameSize(RoundUp(
first_register_slot_in_slow_path_
- + maximum_number_of_live_core_registers * GetWordSize()
- + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
+ + maximum_safepoint_spill_size
+ FrameEntrySpillSize(),
kStackAlignment));
}
@@ -765,21 +762,16 @@
LocationSummary* locations = instruction->GetLocations();
uint32_t register_mask = locations->GetRegisterMask();
- if (instruction->IsSuspendCheck()) {
- // Suspend check has special ABI that saves the caller-save registers in callee,
- // so we want to emit stack maps containing the registers.
- // TODO: Register allocator still reserves space for the caller-save registers.
- // We should add slow-path-specific caller-save information into LocationSummary
- // and refactor the code here as well as in the register allocator to use it.
+ DCHECK_EQ(register_mask & ~locations->GetLiveRegisters()->GetCoreRegisters(), 0u);
+ if (locations->OnlyCallsOnSlowPath()) {
+ // In case of slow path, we currently set the location of caller-save registers
+ // to register (instead of their stack location when pushed before the slow-path
+ // call). Therefore register_mask contains both callee-save and caller-save
+ // registers that hold objects. We must remove the spilled caller-save from the
+ // mask, since they will be overwritten by the callee.
+ uint32_t spills = GetSlowPathSpills(locations, /* core_registers */ true);
+ register_mask &= ~spills;
} else {
- if (locations->OnlyCallsOnSlowPath()) {
- // In case of slow path, we currently set the location of caller-save registers
- // to register (instead of their stack location when pushed before the slow-path
- // call). Therefore register_mask contains both callee-save and caller-save
- // registers that hold objects. We must remove the caller-save from the mask, since
- // they will be overwritten by the callee.
- register_mask &= core_callee_save_mask_;
- }
// The register mask must be a subset of callee-save registers.
DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
}
@@ -1235,58 +1227,44 @@
}
void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- RegisterSet* live_registers = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
- for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
- if (!codegen->IsCoreCalleeSaveRegister(i)) {
- if (live_registers->ContainsCoreRegister(i)) {
- // If the register holds an object, update the stack mask.
- if (locations->RegisterContainsObject(i)) {
- locations->SetStackBit(stack_offset / kVRegSize);
- }
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- saved_core_stack_offsets_[i] = stack_offset;
- stack_offset += codegen->SaveCoreRegister(stack_offset, i);
- }
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ // If the register holds an object, update the stack mask.
+ if (locations->RegisterContainsObject(i)) {
+ locations->SetStackBit(stack_offset / kVRegSize);
}
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_core_stack_offsets_[i] = stack_offset;
+ stack_offset += codegen->SaveCoreRegister(stack_offset, i);
}
- for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
- if (live_registers->ContainsFloatingPointRegister(i)) {
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- saved_fpu_stack_offsets_[i] = stack_offset;
- stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i);
- }
- }
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ for (size_t i : LowToHighBits(fp_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ saved_fpu_stack_offsets_[i] = stack_offset;
+ stack_offset += codegen->SaveFloatingPointRegister(stack_offset, i);
}
}
void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- RegisterSet* live_registers = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
- for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
- if (!codegen->IsCoreCalleeSaveRegister(i)) {
- if (live_registers->ContainsCoreRegister(i)) {
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
- }
- }
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ for (uint32_t i : LowToHighBits(core_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
}
- for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
- if (live_registers->ContainsFloatingPointRegister(i)) {
- DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
- DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
- stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
- }
- }
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ for (size_t i : LowToHighBits(fp_spills)) {
+ DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+ stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
}
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index fd396c4..072d8cf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,7 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/bit_field.h"
+#include "base/bit_utils.h"
#include "base/enums.h"
#include "compiled_method.h"
#include "driver/compiler_options.h"
@@ -212,8 +213,7 @@
virtual size_t GetFloatingPointSpillSlotSize() const = 0;
virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
void InitializeCodeGeneration(size_t number_of_spill_slots,
- size_t maximum_number_of_live_core_registers,
- size_t maximum_number_of_live_fpu_registers,
+ size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order);
// Backends can override this as necessary. For most, no special alignment is required.
@@ -279,6 +279,30 @@
return (fpu_callee_save_mask_ & (1 << reg)) != 0;
}
+ uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
+ DCHECK(locations->OnlyCallsOnSlowPath() ||
+ (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
+ !locations->HasCustomSlowPathCallingConvention()));
+ uint32_t live_registers = core_registers
+ ? locations->GetLiveRegisters()->GetCoreRegisters()
+ : locations->GetLiveRegisters()->GetFloatingPointRegisters();
+ if (locations->HasCustomSlowPathCallingConvention()) {
+ // Save only the live registers that the custom calling convention wants us to save.
+ uint32_t caller_saves = core_registers
+ ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
+ : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
+ return live_registers & caller_saves;
+ } else {
+ // Default ABI, we need to spill non-callee-save live registers.
+ uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
+ return live_registers & ~callee_saves;
+ }
+ }
+
+ size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
+ return POPCOUNT(GetSlowPathSpills(locations, core_registers));
+ }
+
// Record native to dex mapping for a suspend point. Required by runtime.
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
// Check whether we have already recorded mapping at this PC.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e60b781..7c2e80e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3996,6 +3996,9 @@
object_field_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
bool volatile_for_double = field_info.IsVolatile()
@@ -4472,6 +4475,9 @@
object_array_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -5024,7 +5030,9 @@
}
void LocationsBuilderARM::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
}
void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5355,6 +5363,10 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
@@ -5548,6 +5560,10 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+ if (kUseBakerReadBarrier && !load->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
+
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
@@ -5646,6 +5662,7 @@
void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -5653,6 +5670,7 @@
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -5662,6 +5680,9 @@
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The "out" register is used as a temporary, so it overlaps with the inputs.
@@ -5832,6 +5853,7 @@
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -5840,6 +5862,7 @@
call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
+ baker_read_barrier_slow_path = kUseBakerReadBarrier && !throws_into_catch;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -5849,6 +5872,9 @@
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// Note that TypeCheckSlowPathARM uses this "temp" register too.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a4fc044..4fcad51 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1635,6 +1635,9 @@
object_field_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
@@ -2060,6 +2063,9 @@
object_array_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -3101,6 +3107,7 @@
void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -3108,6 +3115,7 @@
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -3117,6 +3125,9 @@
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The "out" register is used as a temporary, so it overlaps with the inputs.
@@ -3288,6 +3299,7 @@
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -3296,6 +3308,7 @@
call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
+ baker_read_barrier_slow_path = kUseBakerReadBarrier && !throws_into_catch;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -3305,6 +3318,9 @@
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// Note that TypeCheckSlowPathARM64 uses this "temp" register too.
@@ -3991,6 +4007,10 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
@@ -4181,6 +4201,10 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+ if (kUseBakerReadBarrier && !load->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
+
if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -4711,7 +4735,9 @@
}
void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
}
void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8a2f90d..af998e9 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -5220,7 +5220,9 @@
}
void LocationsBuilderMIPS::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
}
void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 4a5755c..fbf7b33 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3782,7 +3782,9 @@
}
void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
}
void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f6e8ee1..6da8828 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4613,6 +4613,9 @@
kEmitCompilerReadBarrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -5077,6 +5080,9 @@
object_array_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -5639,7 +5645,9 @@
}
void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
}
void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -6006,6 +6014,10 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
@@ -6187,6 +6199,10 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+ if (kUseBakerReadBarrier && !load->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
+
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
@@ -6282,6 +6298,7 @@
void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -6289,6 +6306,7 @@
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -6298,6 +6316,9 @@
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
// Note that TypeCheckSlowPathX86 uses this "out" register too.
@@ -6495,6 +6516,7 @@
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -6503,6 +6525,7 @@
call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
+ baker_read_barrier_slow_path = kUseBakerReadBarrier && !throws_into_catch;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -6511,6 +6534,9 @@
break;
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
// Note that TypeCheckSlowPathX86 uses this "temp" register too.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cb227a1..9a9cb26 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4146,6 +4146,9 @@
object_field_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
@@ -4575,6 +4578,9 @@
object_array_get_with_read_barrier ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -5127,7 +5133,9 @@
}
void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
}
void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5437,6 +5445,10 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
@@ -5601,6 +5613,10 @@
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+ if (kUseBakerReadBarrier && !load->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
+
if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -5687,6 +5703,7 @@
void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -5694,6 +5711,7 @@
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -5703,6 +5721,9 @@
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
// Note that TypeCheckSlowPathX86_64 uses this "out" register too.
@@ -5905,6 +5926,7 @@
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -5913,6 +5935,7 @@
call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
+ baker_read_barrier_slow_path = kUseBakerReadBarrier && !throws_into_catch;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -5921,6 +5944,9 @@
break;
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
// Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 0bbc0e5..78924ba 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -651,6 +651,9 @@
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall,
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 7482057..082076d 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -894,6 +894,9 @@
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall,
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index cf4a040..9c24db9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1984,6 +1984,9 @@
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall,
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index a4ee546..c79a3e3 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2123,6 +2123,9 @@
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall,
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 83596da..1b1b3a7 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -27,12 +27,14 @@
: inputs_(instruction->InputCount(),
instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)),
temps_(instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)),
- output_overlaps_(Location::kOutputOverlap),
call_kind_(call_kind),
+ intrinsified_(intrinsified),
+ has_custom_slow_path_calling_convention_(false),
+ output_overlaps_(Location::kOutputOverlap),
stack_mask_(nullptr),
register_mask_(0),
live_registers_(),
- intrinsified_(intrinsified) {
+ custom_slow_path_caller_saves_() {
instruction->SetLocations(this);
if (NeedsSafepoint()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 5fdfb9b..4384042 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -20,6 +20,7 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/bit_field.h"
+#include "base/bit_utils.h"
#include "base/bit_vector.h"
#include "base/value_object.h"
@@ -452,7 +453,7 @@
}
size_t GetNumberOfRegisters() const {
- return __builtin_popcount(core_registers_) + __builtin_popcount(floating_point_registers_);
+ return POPCOUNT(core_registers_) + POPCOUNT(floating_point_registers_);
}
uint32_t GetCoreRegisters() const {
@@ -466,8 +467,6 @@
private:
uint32_t core_registers_;
uint32_t floating_point_registers_;
-
- DISALLOW_COPY_AND_ASSIGN(RegisterSet);
};
static constexpr bool kIntrinsified = true;
@@ -569,6 +568,21 @@
return CanCall();
}
+ void SetCustomSlowPathCallerSaves(const RegisterSet& caller_saves) {
+ DCHECK(OnlyCallsOnSlowPath());
+ has_custom_slow_path_calling_convention_ = true;
+ custom_slow_path_caller_saves_ = caller_saves;
+ }
+
+ bool HasCustomSlowPathCallingConvention() const {
+ return has_custom_slow_path_calling_convention_;
+ }
+
+ const RegisterSet& GetCustomSlowPathCallerSaves() const {
+ DCHECK(HasCustomSlowPathCallingConvention());
+ return custom_slow_path_caller_saves_;
+ }
+
void SetStackBit(uint32_t index) {
stack_mask_->SetBit(index);
}
@@ -628,18 +642,18 @@
return intrinsified_;
}
- void SetIntrinsified(bool intrinsified) {
- intrinsified_ = intrinsified;
- }
-
private:
ArenaVector<Location> inputs_;
ArenaVector<Location> temps_;
+ const CallKind call_kind_;
+ // Whether these are locations for an intrinsified call.
+ const bool intrinsified_;
+ // Whether the slow path has default or custom calling convention.
+ bool has_custom_slow_path_calling_convention_;
// Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
// share the same register as the inputs.
Location::OutputOverlap output_overlaps_;
Location output_;
- const CallKind call_kind_;
// Mask of objects that live in the stack.
BitVector* stack_mask_;
@@ -650,8 +664,8 @@
// Registers that are in use at this position.
RegisterSet live_registers_;
- // Whether these are locations for an intrinsified call.
- bool intrinsified_;
+ // Custom slow path caller saves. Valid only if indicated by slow_path_calling_convention_.
+ RegisterSet custom_slow_path_caller_saves_;
friend class RegisterAllocatorTest;
DISALLOW_COPY_AND_ASSIGN(LocationSummary);
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 3450286..ad7a8a3 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -28,8 +28,7 @@
codegen_(codegen),
liveness_(liveness) {}
-void RegisterAllocationResolver::Resolve(size_t max_safepoint_live_core_regs,
- size_t max_safepoint_live_fp_regs,
+void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoints,
size_t reserved_out_slots,
size_t int_spill_slots,
size_t long_spill_slots,
@@ -43,10 +42,13 @@
+ double_spill_slots
+ catch_phi_spill_slots;
+ // Update safepoints and calculate the size of the spills.
+ UpdateSafepointLiveRegisters();
+ size_t maximum_safepoint_spill_size = CalculateMaximumSafepointSpillSize(safepoints);
+
// Computes frame size and spill mask.
codegen_->InitializeCodeGeneration(spill_slots,
- max_safepoint_live_core_regs,
- max_safepoint_live_fp_regs,
+ maximum_safepoint_spill_size,
reserved_out_slots, // Includes slot(s) for the art method.
codegen_->GetGraph()->GetLinearOrder());
@@ -135,8 +137,7 @@
// Connect siblings and resolve inputs.
for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- ConnectSiblings(instruction->GetLiveInterval(),
- max_safepoint_live_core_regs + max_safepoint_live_fp_regs);
+ ConnectSiblings(instruction->GetLiveInterval());
}
// Resolve non-linear control flow across branches. Order does not matter.
@@ -222,8 +223,73 @@
}
}
-void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval,
- size_t max_safepoint_live_regs) {
+void RegisterAllocationResolver::UpdateSafepointLiveRegisters() {
+ for (size_t i = 0, e = liveness_.GetNumberOfSsaValues(); i < e; ++i) {
+ HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
+ for (LiveInterval* current = instruction->GetLiveInterval();
+ current != nullptr;
+ current = current->GetNextSibling()) {
+ if (!current->HasRegister()) {
+ continue;
+ }
+ Location source = current->ToLocation();
+ for (SafepointPosition* safepoint_position = current->GetFirstSafepoint();
+ safepoint_position != nullptr;
+ safepoint_position = safepoint_position->GetNext()) {
+ DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
+ LocationSummary* locations = safepoint_position->GetLocations();
+ switch (source.GetKind()) {
+ case Location::kRegister:
+ case Location::kFpuRegister: {
+ locations->AddLiveRegister(source);
+ break;
+ }
+ case Location::kRegisterPair:
+ case Location::kFpuRegisterPair: {
+ locations->AddLiveRegister(source.ToLow());
+ locations->AddLiveRegister(source.ToHigh());
+ break;
+ }
+ case Location::kStackSlot: // Fall-through
+ case Location::kDoubleStackSlot: // Fall-through
+ case Location::kConstant: {
+ // Nothing to do.
+ break;
+ }
+ default: {
+ LOG(FATAL) << "Unexpected location for object";
+ }
+ }
+ }
+ }
+ }
+}
+
+size_t RegisterAllocationResolver::CalculateMaximumSafepointSpillSize(
+ ArrayRef<HInstruction* const> safepoints) {
+ size_t core_register_spill_size = codegen_->GetWordSize();
+ size_t fp_register_spill_size = codegen_->GetFloatingPointSpillSlotSize();
+ size_t maximum_safepoint_spill_size = 0u;
+ for (HInstruction* instruction : safepoints) {
+ LocationSummary* locations = instruction->GetLocations();
+ if (locations->OnlyCallsOnSlowPath()) {
+ size_t core_spills =
+ codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true);
+ size_t fp_spills =
+ codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false);
+ size_t spill_size =
+ core_register_spill_size * core_spills + fp_register_spill_size * fp_spills;
+ maximum_safepoint_spill_size = std::max(maximum_safepoint_spill_size, spill_size);
+ } else if (locations->CallsOnMainAndSlowPath()) {
+ // Nothing to spill on the slow path if the main path already clobbers caller-saves.
+ DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true));
+ DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false));
+ }
+ }
+ return maximum_safepoint_spill_size;
+}
+
+void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) {
LiveInterval* current = interval;
if (current->HasSpillSlot()
&& current->HasRegister()
@@ -306,48 +372,16 @@
safepoint_position = safepoint_position->GetNext()) {
DCHECK(current->CoversSlow(safepoint_position->GetPosition()));
- LocationSummary* locations = safepoint_position->GetLocations();
- if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+ if (current->GetType() == Primitive::kPrimNot) {
DCHECK(interval->GetDefinedBy()->IsActualObject())
<< interval->GetDefinedBy()->DebugName()
<< "@" << safepoint_position->GetInstruction()->DebugName();
- locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
- }
-
- switch (source.GetKind()) {
- case Location::kRegister: {
- locations->AddLiveRegister(source);
- if (kIsDebugBuild && locations->OnlyCallsOnSlowPath()) {
- DCHECK_LE(locations->GetNumberOfLiveRegisters(),
- max_safepoint_live_regs);
- }
- if (current->GetType() == Primitive::kPrimNot) {
- DCHECK(interval->GetDefinedBy()->IsActualObject())
- << interval->GetDefinedBy()->DebugName()
- << "@" << safepoint_position->GetInstruction()->DebugName();
- locations->SetRegisterBit(source.reg());
- }
- break;
+ LocationSummary* locations = safepoint_position->GetLocations();
+ if (current->GetParent()->HasSpillSlot()) {
+ locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
}
- case Location::kFpuRegister: {
- locations->AddLiveRegister(source);
- break;
- }
-
- case Location::kRegisterPair:
- case Location::kFpuRegisterPair: {
- locations->AddLiveRegister(source.ToLow());
- locations->AddLiveRegister(source.ToHigh());
- break;
- }
- case Location::kStackSlot: // Fall-through
- case Location::kDoubleStackSlot: // Fall-through
- case Location::kConstant: {
- // Nothing to do.
- break;
- }
- default: {
- LOG(FATAL) << "Unexpected location for object";
+ if (source.GetKind() == Location::kRegister) {
+ locations->SetRegisterBit(source.reg());
}
}
}
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
index 6ceb9bc..a70ceae 100644
--- a/compiler/optimizing/register_allocation_resolver.h
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -20,6 +20,7 @@
#include "base/arena_containers.h"
#include "base/value_object.h"
#include "primitive.h"
+#include "utils/array_ref.h"
namespace art {
@@ -43,8 +44,7 @@
CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness);
- void Resolve(size_t max_safepoint_live_core_regs,
- size_t max_safepoint_live_fp_regs,
+ void Resolve(ArrayRef<HInstruction* const> safepoints,
size_t reserved_out_slots, // Includes slot(s) for the art method.
size_t int_spill_slots,
size_t long_spill_slots,
@@ -54,10 +54,14 @@
const ArenaVector<LiveInterval*>& temp_intervals);
private:
+ // Update live registers of safepoint location summary.
+ void UpdateSafepointLiveRegisters();
+
+ // Calculate the maximum size of the spill area for safepoints.
+ size_t CalculateMaximumSafepointSpillSize(ArrayRef<HInstruction* const> safepoints);
+
// Connect adjacent siblings within blocks, and resolve inputs along the way.
- // Uses max_safepoint_live_regs to check that we did not underestimate the
- // number of live registers at safepoints.
- void ConnectSiblings(LiveInterval* interval, size_t max_safepoint_live_regs);
+ void ConnectSiblings(LiveInterval* interval);
// Connect siblings between block entries and exits.
void ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index a21595f..7178399 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -438,8 +438,7 @@
// track of live intervals across safepoints.
// TODO: Should build safepoints elsewhere.
void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals,
- const ArenaVector<InterferenceNode*>& physical_nodes,
- ArenaVector<InterferenceNode*>* safepoints);
+ const ArenaVector<InterferenceNode*>& physical_nodes);
// Add coalesce opportunities to interference nodes.
void FindCoalesceOpportunities();
@@ -566,11 +565,7 @@
num_long_spill_slots_(0),
catch_phi_spill_slot_counter_(0),
reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
- reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()),
- number_of_globally_blocked_core_regs_(0),
- number_of_globally_blocked_fp_regs_(0),
- max_safepoint_live_core_regs_(0),
- max_safepoint_live_fp_regs_(0) {
+ reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
// Before we ask for blocked registers, set them up in the code generator.
codegen->SetupBlockedRegisters();
@@ -584,7 +579,6 @@
physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
core_intervals_.push_back(interval);
if (codegen_->IsBlockedCoreRegister(i)) {
- ++number_of_globally_blocked_core_regs_;
interval->AddRange(0, liveness.GetMaxLifetimePosition());
}
}
@@ -597,7 +591,6 @@
physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
fp_intervals_.push_back(interval);
if (codegen_->IsBlockedFloatingPointRegister(i)) {
- ++number_of_globally_blocked_fp_regs_;
interval->AddRange(0, liveness.GetMaxLifetimePosition());
}
}
@@ -638,7 +631,7 @@
ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
? physical_core_nodes_
: physical_fp_nodes_;
- iteration.BuildInterferenceGraph(intervals, physical_nodes, &safepoints);
+ iteration.BuildInterferenceGraph(intervals, physical_nodes);
// (3) Add coalesce opportunities.
// If we have tried coloring the graph a suspiciously high number of times, give
@@ -667,19 +660,6 @@
// Assign spill slots.
AllocateSpillSlots(iteration.GetPrunableNodes());
- // Compute the maximum number of live registers across safepoints.
- // Notice that we do not count globally blocked registers, such as the stack pointer.
- if (safepoints.size() > 0) {
- size_t max_safepoint_live_regs = ComputeMaxSafepointLiveRegisters(safepoints);
- if (processing_core_regs) {
- max_safepoint_live_core_regs_ =
- max_safepoint_live_regs - number_of_globally_blocked_core_regs_;
- } else {
- max_safepoint_live_fp_regs_=
- max_safepoint_live_regs - number_of_globally_blocked_fp_regs_;
- }
- }
-
// Tell the code generator which registers were allocated.
// We only look at prunable_nodes because we already told the code generator about
// fixed intervals while processing instructions. We also ignore the fixed intervals
@@ -711,8 +691,7 @@
// (6) Resolve locations and deconstruct SSA form.
RegisterAllocationResolver(allocator_, codegen_, liveness_)
- .Resolve(max_safepoint_live_core_regs_,
- max_safepoint_live_fp_regs_,
+ .Resolve(ArrayRef<HInstruction* const>(safepoints_),
reserved_art_method_slots_ + reserved_out_slots_,
num_int_spill_slots_,
num_long_spill_slots_,
@@ -989,24 +968,9 @@
void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
LocationSummary* locations = instruction->GetLocations();
- size_t position = instruction->GetLifetimePosition();
if (locations->NeedsSafepoint()) {
safepoints_.push_back(instruction);
- if (locations->OnlyCallsOnSlowPath()) {
- // We add a synthesized range at this position to record the live registers
- // at this position. Ideally, we could just update the safepoints when locations
- // are updated, but we currently need to know the full stack size before updating
- // locations (because of parameters and the fact that we don't have a frame pointer).
- // And knowing the full stack size requires to know the maximum number of live
- // registers at calls in slow paths.
- // By adding the following interval in the algorithm, we can compute this
- // maximum before updating locations.
- LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
- interval->AddRange(position, position + 1);
- core_intervals_.push_back(interval);
- fp_intervals_.push_back(interval);
- }
}
}
@@ -1110,11 +1074,6 @@
bool both_directions) {
if (from->IsPrecolored()) {
// We save space by ignoring outgoing edges from fixed nodes.
- } else if (to->GetInterval()->IsSlowPathSafepoint()) {
- // Safepoint intervals are only there to count max live registers,
- // so no need to give them incoming interference edges.
- // This is also necessary for correctness, because we don't want nodes
- // to remove themselves from safepoint adjacency sets when they're pruned.
} else if (to->IsPrecolored()) {
// It is important that only a single node represents a given fixed register in the
// interference graph. We retrieve that node here.
@@ -1200,8 +1159,7 @@
void ColoringIteration::BuildInterferenceGraph(
const ArenaVector<LiveInterval*>& intervals,
- const ArenaVector<InterferenceNode*>& physical_nodes,
- ArenaVector<InterferenceNode*>* safepoints) {
+ const ArenaVector<InterferenceNode*>& physical_nodes) {
DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
// Build the interference graph efficiently by ordering range endpoints
// by position and doing a linear sweep to find interferences. (That is, we
@@ -1236,11 +1194,6 @@
node->SetAlias(physical_node);
DCHECK_EQ(node->GetInterval()->GetRegister(),
physical_node->GetInterval()->GetRegister());
- } else if (sibling->IsSlowPathSafepoint()) {
- // Safepoint intervals are synthesized to count max live registers.
- // They will be processed separately after coloring.
- node->stage = NodeStage::kSafepoint;
- safepoints->push_back(node);
} else {
node->stage = NodeStage::kPrunable;
prunable_nodes_.push_back(node);
@@ -1494,7 +1447,6 @@
// filled by FindCoalesceOpportunities().
for (InterferenceNode* node : prunable_nodes_) {
DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
- DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) << "Safepoint nodes should never be pruned";
if (IsLowDegreeNode(node, num_regs_)) {
if (node->GetCoalesceOpportunities().empty()) {
// Simplify Worklist.
@@ -1577,8 +1529,6 @@
pruned_nodes_.push(node);
for (InterferenceNode* adj : node->GetAdjacentNodes()) {
- DCHECK(!adj->GetInterval()->IsSlowPathSafepoint())
- << "Nodes should never interfere with synthesized safepoint nodes";
DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
if (adj->IsPrecolored()) {
@@ -1938,18 +1888,6 @@
return successful;
}
-size_t RegisterAllocatorGraphColor::ComputeMaxSafepointLiveRegisters(
- const ArenaVector<InterferenceNode*>& safepoints) {
- size_t max_safepoint_live_regs = 0;
- for (InterferenceNode* safepoint : safepoints) {
- DCHECK(safepoint->GetInterval()->IsSlowPathSafepoint());
- std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(safepoint->GetAdjacentNodes());
- size_t live_regs = conflict_mask.count();
- max_safepoint_live_regs = std::max(max_safepoint_live_regs, live_regs);
- }
- return max_safepoint_live_regs;
-}
-
void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
// The register allocation resolver will organize the stack based on value type,
// so we assign stack slots for each value type separately.
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
index ed12561..548687f 100644
--- a/compiler/optimizing/register_allocator_graph_color.h
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -140,10 +140,6 @@
bool IsCallerSave(size_t reg, bool processing_core_regs);
- // Return the maximum number of registers live at safepoints,
- // based on the outgoing interference edges of safepoint nodes.
- size_t ComputeMaxSafepointLiveRegisters(const ArenaVector<InterferenceNode*>& safepoints);
-
// Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
// assigned the same stack slot.
void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes,
@@ -187,14 +183,6 @@
// Number of stack slots needed for outgoing arguments.
const size_t reserved_out_slots_;
- // The number of globally blocked core and floating point registers, such as the stack pointer.
- size_t number_of_globally_blocked_core_regs_;
- size_t number_of_globally_blocked_fp_regs_;
-
- // The maximum number of registers live at safe points. Needed by the code generator.
- size_t max_safepoint_live_core_regs_;
- size_t max_safepoint_live_fp_regs_;
-
friend class ColoringIteration;
DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 768ed2d..6910c71 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -63,9 +63,7 @@
registers_array_(nullptr),
blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
- reserved_out_slots_(0),
- maximum_number_of_live_core_registers_(0),
- maximum_number_of_live_fp_registers_(0) {
+ reserved_out_slots_(0) {
temp_intervals_.reserve(4);
int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
@@ -92,8 +90,7 @@
void RegisterAllocatorLinearScan::AllocateRegisters() {
AllocateRegistersInternal();
RegisterAllocationResolver(allocator_, codegen_, liveness_)
- .Resolve(maximum_number_of_live_core_registers_,
- maximum_number_of_live_fp_registers_,
+ .Resolve(ArrayRef<HInstruction* const>(safepoints_),
reserved_out_slots_,
int_spill_slots_.size(),
long_spill_slots_.size(),
@@ -283,20 +280,6 @@
return;
}
safepoints_.push_back(instruction);
- if (locations->OnlyCallsOnSlowPath()) {
- // We add a synthesized range at this position to record the live registers
- // at this position. Ideally, we could just update the safepoints when locations
- // are updated, but we currently need to know the full stack size before updating
- // locations (because of parameters and the fact that we don't have a frame pointer).
- // And knowing the full stack size requires to know the maximum number of live
- // registers at calls in slow paths.
- // By adding the following interval in the algorithm, we can compute this
- // maximum before updating locations.
- LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
- interval->AddRange(position, position + 1);
- AddSorted(&unhandled_core_intervals_, interval);
- AddSorted(&unhandled_fp_intervals_, interval);
- }
}
if (locations->WillCall()) {
@@ -569,20 +552,6 @@
});
inactive_.erase(inactive_kept_end, inactive_to_handle_end);
- if (current->IsSlowPathSafepoint()) {
- // Synthesized interval to record the maximum number of live registers
- // at safepoints. No need to allocate a register for it.
- if (processing_core_registers_) {
- maximum_number_of_live_core_registers_ =
- std::max(maximum_number_of_live_core_registers_, active_.size());
- } else {
- maximum_number_of_live_fp_registers_ =
- std::max(maximum_number_of_live_fp_registers_, active_.size());
- }
- DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
- continue;
- }
-
if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
DCHECK(!current->HasRegister());
// Allocating the low part was unsucessful. The splitted interval for the high part
@@ -685,7 +654,7 @@
// the next intersection with `current`.
for (LiveInterval* inactive : inactive_) {
// Temp/Slow-path-safepoint interval has no holes.
- DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+ DCHECK(!inactive->IsTemp());
if (!current->IsSplit() && !inactive->IsFixed()) {
// Neither current nor inactive are fixed.
// Thanks to SSA, a non-split interval starting in a hole of an
@@ -933,7 +902,7 @@
// start of current.
for (LiveInterval* inactive : inactive_) {
// Temp/Slow-path-safepoint interval has no holes.
- DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
+ DCHECK(!inactive->IsTemp());
if (!current->IsSplit() && !inactive->IsFixed()) {
// Neither current nor inactive are fixed.
// Thanks to SSA, a non-split interval starting in a hole of an
@@ -1085,12 +1054,6 @@
if (current->StartsAfter(interval) && !current->IsHighInterval()) {
insert_at = i;
break;
- } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
- // Ensure the slow path interval is the last to be processed at its location: we want the
- // interval to know all live registers at this location.
- DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
- insert_at = i;
- break;
}
}
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
index 1a643a0..b3834f4 100644
--- a/compiler/optimizing/register_allocator_linear_scan.h
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -171,12 +171,6 @@
// Slots reserved for out arguments.
size_t reserved_out_slots_;
- // The maximum live core registers at safepoints.
- size_t maximum_number_of_live_core_registers_;
-
- // The maximum live FP registers at safepoints.
- size_t maximum_number_of_live_fp_registers_;
-
ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 92788fe..06785e1 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -208,11 +208,6 @@
return new (allocator) LiveInterval(allocator, type, instruction);
}
- static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) {
- return new (allocator) LiveInterval(
- allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true);
- }
-
static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) {
return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false);
}
@@ -223,7 +218,6 @@
bool IsFixed() const { return is_fixed_; }
bool IsTemp() const { return is_temp_; }
- bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; }
// This interval is the result of a split.
bool IsSplit() const { return parent_ != this; }
@@ -790,7 +784,7 @@
DCHECK(!HasHighInterval());
DCHECK(!HasLowInterval());
high_or_low_interval_ = new (allocator_) LiveInterval(
- allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true);
+ allocator_, type_, defined_by_, false, kNoRegister, is_temp, true);
high_or_low_interval_->high_or_low_interval_ = this;
if (first_range_ != nullptr) {
high_or_low_interval_->first_range_ = first_range_->Dup(allocator_);
@@ -919,7 +913,6 @@
bool is_fixed = false,
int reg = kNoRegister,
bool is_temp = false,
- bool is_slow_path_safepoint = false,
bool is_high_interval = false)
: allocator_(allocator),
first_range_(nullptr),
@@ -936,7 +929,6 @@
spill_slot_(kNoSpillSlot),
is_fixed_(is_fixed),
is_temp_(is_temp),
- is_slow_path_safepoint_(is_slow_path_safepoint),
is_high_interval_(is_high_interval),
high_or_low_interval_(nullptr),
defined_by_(defined_by) {}
@@ -1121,9 +1113,6 @@
// Whether the interval is for a temporary.
const bool is_temp_;
- // Whether the interval is for a safepoint that calls on slow path.
- const bool is_slow_path_safepoint_;
-
// Whether this interval is a synthesized interval for register pair.
const bool is_high_interval_;