Support for saving and restoring live registers in a slow path.
And use it in suspend check slow paths.
Change-Id: I79caf28f334c145a36180c79a6e2fceae3990c31
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2547a29..3231c99 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -44,6 +44,7 @@
ComputeFrameSize(GetGraph()->GetNumberOfLocalVRegs()
+ GetGraph()->GetNumberOfTemporaries()
+ 1 /* filler */,
+ 0, /* the baseline compiler does not have live registers at slow path */
GetGraph()->GetMaximumNumberOfOutVRegs()
+ 1 /* current method */);
GenerateFrameEntry();
@@ -111,10 +112,15 @@
return -1;
}
-void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, size_t number_of_out_slots) {
+void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots,
+ size_t maximum_number_of_live_registers,
+ size_t number_of_out_slots) {
+ first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
+
SetFrameSize(RoundUp(
number_of_spill_slots * kVRegSize
+ number_of_out_slots * kVRegSize
+ + maximum_number_of_live_registers * GetWordSize()
+ FrameEntrySpillSize(),
kStackAlignment));
}
@@ -468,4 +474,48 @@
}
}
+size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) {
+ return first_register_slot_in_slow_path_ + index * GetWordSize();
+}
+
+void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) {
+ RegisterSet* register_set = locations->GetLiveRegisters();
+ uint32_t count = 0;
+ for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (register_set->ContainsCoreRegister(i)) {
+ size_t stack_offset = GetStackOffsetOfSavedRegister(count);
+ ++count;
+ SaveCoreRegister(Location::StackSlot(stack_offset), i);
+ // If the register holds an object, update the stack mask.
+ if (locations->RegisterContainsObject(i)) {
+ locations->SetStackBit(stack_offset / kVRegSize);
+ }
+ }
+ }
+
+ for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
+ if (register_set->ContainsFloatingPointRegister(i)) {
+ LOG(FATAL) << "Unimplemented";
+ }
+ }
+}
+
+void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) {
+ RegisterSet* register_set = locations->GetLiveRegisters();
+ uint32_t count = 0;
+ for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (register_set->ContainsCoreRegister(i)) {
+ size_t stack_offset = GetStackOffsetOfSavedRegister(count);
+ ++count;
+ RestoreCoreRegister(Location::StackSlot(stack_offset), i);
+ }
+ }
+
+ for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
+ if (register_set->ContainsFloatingPointRegister(i)) {
+ LOG(FATAL) << "Unimplemented";
+ }
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a83d703..55f5d8d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -98,7 +98,9 @@
virtual HGraphVisitor* GetInstructionVisitor() = 0;
virtual Assembler* GetAssembler() = 0;
virtual size_t GetWordSize() const = 0;
- void ComputeFrameSize(size_t number_of_spill_slots, size_t number_of_out_slots);
+ void ComputeFrameSize(size_t number_of_spill_slots,
+ size_t maximum_number_of_live_registers,
+ size_t number_of_out_slots);
virtual size_t FrameEntrySpillSize() const = 0;
int32_t GetStackSlot(HLocal* local) const;
Location GetTemporaryLocation(HTemporary* temp) const;
@@ -114,6 +116,8 @@
virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
virtual InstructionSet GetInstructionSet() const = 0;
+ virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0;
+ virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0;
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
@@ -128,6 +132,8 @@
void BuildNativeGCMap(
std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
void BuildStackMaps(std::vector<uint8_t>* vector);
+ void SaveLiveRegisters(LocationSummary* locations);
+ void RestoreLiveRegisters(LocationSummary* locations);
bool IsLeafMethod() const {
return is_leaf_;
@@ -141,6 +147,7 @@
CodeGenerator(HGraph* graph, size_t number_of_registers)
: frame_size_(kUninitializedFrameSize),
core_spill_mask_(0),
+ first_register_slot_in_slow_path_(0),
graph_(graph),
block_labels_(graph->GetArena(), 0),
pc_infos_(graph->GetArena(), 32),
@@ -166,9 +173,11 @@
// Frame size required for this method.
uint32_t frame_size_;
uint32_t core_spill_mask_;
+ uint32_t first_register_slot_in_slow_path_;
private:
void InitLocations(HInstruction* instruction);
+ size_t GetStackOffsetOfSavedRegister(size_t index);
HGraph* const graph_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ad62279..ce1c73d 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -98,10 +98,12 @@
virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
__ Bind(GetEntryLabel());
+ codegen->SaveLiveRegisters(instruction_->GetLocations());
int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pTestSuspend).Int32Value();
__ ldr(LR, Address(TR, offset));
__ blx(LR);
codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ codegen->RestoreLiveRegisters(instruction_->GetLocations());
__ b(GetReturnLabel());
}
@@ -182,6 +184,14 @@
stream << ArmManagedRegister::FromDRegister(DRegister(reg));
}
+void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
+ __ str(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex()));
+}
+
+void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
+ __ ldr(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex()));
+}
+
CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
: CodeGenerator(graph, kNumberOfRegIds),
location_builder_(graph, this),
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 2480960..0902fb8 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -132,6 +132,8 @@
virtual void GenerateFrameExit() OVERRIDE;
virtual void Bind(Label* label) OVERRIDE;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+ virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+ virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
virtual size_t GetWordSize() const OVERRIDE {
return kArmWordSize;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3383cb2..9b36a97 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -122,8 +122,10 @@
virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
__ Bind(GetEntryLabel());
+ codegen->SaveLiveRegisters(instruction_->GetLocations());
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend)));
codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ codegen->RestoreLiveRegisters(instruction_->GetLocations());
__ jmp(GetReturnLabel());
}
@@ -161,6 +163,14 @@
stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg));
}
+void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
+ __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast<Register>(reg_id));
+}
+
+void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
+ __ movl(static_cast<Register>(reg_id), Address(ESP, stack_location.GetStackIndex()));
+}
+
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph)
: CodeGenerator(graph, kNumberOfRegIds),
location_builder_(graph, this),
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f1be0ad..ffcaf60 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -134,6 +134,8 @@
virtual void GenerateFrameExit() OVERRIDE;
virtual void Bind(Label* label) OVERRIDE;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+ virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+ virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
virtual size_t GetWordSize() const OVERRIDE {
return kX86WordSize;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ca03af8..065f981 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -103,8 +103,10 @@
virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
__ Bind(GetEntryLabel());
+ codegen->SaveLiveRegisters(instruction_->GetLocations());
__ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true));
codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ codegen->RestoreLiveRegisters(instruction_->GetLocations());
__ jmp(GetReturnLabel());
}
@@ -170,6 +172,14 @@
stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg));
}
+void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
+ __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id));
+}
+
+void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
+ __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex()));
+}
+
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
: CodeGenerator(graph, kNumberOfRegIds),
location_builder_(graph, this),
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 78b60fe..ea21872 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -131,6 +131,8 @@
virtual void GenerateFrameExit() OVERRIDE;
virtual void Bind(Label* label) OVERRIDE;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+ virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+ virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
virtual size_t GetWordSize() const OVERRIDE {
return kX86_64WordSize;
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index fce97bd..1c36cdf 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -28,7 +28,7 @@
call_kind_(call_kind),
stack_mask_(nullptr),
register_mask_(0),
- live_registers_(0) {
+ live_registers_() {
inputs_.SetSize(instruction->InputCount());
for (size_t i = 0; i < instruction->InputCount(); ++i) {
inputs_.Put(i, Location());
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 041e85b..06623b6 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -266,6 +266,34 @@
uword value_;
};
+class RegisterSet : public ValueObject {
+ public:
+ RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+
+ void Add(Location loc) {
+ // TODO: floating point registers.
+ core_registers_ |= (1 << loc.reg().RegId());
+ }
+
+ bool ContainsCoreRegister(uint32_t id) {
+ return Contains(core_registers_, id);
+ }
+
+ bool ContainsFloatingPointRegister(uint32_t id) {
+ return Contains(floating_point_registers_, id);
+ }
+
+ static bool Contains(uint32_t register_set, uint32_t reg) {
+ return (register_set & (1 << reg)) != 0;
+ }
+
+ private:
+ uint32_t core_registers_;
+ uint32_t floating_point_registers_;
+
+ DISALLOW_COPY_AND_ASSIGN(RegisterSet);
+};
+
/**
* The code generator computes LocationSummary for each instruction so that
* the instruction itself knows what code to generate: where to find the inputs
@@ -327,6 +355,8 @@
Location Out() const { return output_; }
bool CanCall() const { return call_kind_ != kNoCall; }
+ bool WillCall() const { return call_kind_ == kCall; }
+ bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; }
bool NeedsSafepoint() const { return CanCall(); }
void SetStackBit(uint32_t index) {
@@ -337,14 +367,22 @@
register_mask_ |= (1 << reg_id);
}
- void SetLiveRegister(uint32_t reg_id) {
- live_registers_ |= (1 << reg_id);
+ bool RegisterContainsObject(uint32_t reg_id) {
+ return RegisterSet::Contains(register_mask_, reg_id);
+ }
+
+ void AddLiveRegister(Location location) {
+ live_registers_.Add(location);
}
BitVector* GetStackMask() const {
return stack_mask_;
}
+ RegisterSet* GetLiveRegisters() {
+ return &live_registers_;
+ }
+
private:
GrowableArray<Location> inputs_;
GrowableArray<Location> temps_;
@@ -359,7 +397,7 @@
uint32_t register_mask_;
// Registers that are in use at this position.
- uint32_t live_registers_;
+ RegisterSet live_registers_;
DISALLOW_COPY_AND_ASSIGN(LocationSummary);
};
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 7862611..7ab14e7 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -45,7 +45,8 @@
number_of_registers_(-1),
registers_array_(nullptr),
blocked_registers_(allocator->AllocArray<bool>(codegen->GetNumberOfRegisters())),
- reserved_out_slots_(0) {
+ reserved_out_slots_(0),
+ maximum_number_of_live_registers_(0) {
codegen->SetupBlockedRegisters(blocked_registers_);
physical_register_intervals_.SetSize(codegen->GetNumberOfRegisters());
// Always reserve for the current method and the graph's max out registers.
@@ -157,9 +158,34 @@
}
}
+ bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
+ && (instruction->GetType() != Primitive::kPrimFloat);
+
+ GrowableArray<LiveInterval*>& unhandled = core_register
+ ? unhandled_core_intervals_
+ : unhandled_fp_intervals_;
+
if (locations->CanCall()) {
- codegen_->MarkNotLeaf();
+ if (!instruction->IsSuspendCheck()) {
+ codegen_->MarkNotLeaf();
+ }
safepoints_.Add(instruction);
+ if (locations->OnlyCallsOnSlowPath()) {
+ // We add a synthesized range at this position to record the live registers
+ // at this position. Ideally, we could just update the safepoints when locations
+ // are updated, but we currently need to know the full stack size before updating
+ // locations (because of parameters and the fact that we don't have a frame pointer).
+ // And knowing the full stack size requires to know the maximum number of live
+ // registers at calls in slow paths.
+ // By adding the following interval in the algorithm, we can compute this
+ // maximum before updating locations.
+ LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
+ interval->AddRange(position, position + 1);
+ unhandled.Add(interval);
+ }
+ }
+
+ if (locations->WillCall()) {
// Block all registers.
for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
BlockRegister(Location::RegisterLocation(ManagedRegister(i)),
@@ -176,12 +202,6 @@
}
}
- bool core_register = (instruction->GetType() != Primitive::kPrimDouble)
- && (instruction->GetType() != Primitive::kPrimFloat);
- GrowableArray<LiveInterval*>& unhandled = core_register
- ? unhandled_core_intervals_
- : unhandled_fp_intervals_;
-
LiveInterval* current = instruction->GetLiveInterval();
if (current == nullptr) return;
@@ -405,6 +425,14 @@
}
}
+ if (current->IsSlowPathSafepoint()) {
+ // Synthesized interval to record the maximum number of live registers
+ // at safepoints. No need to allocate a register for it.
+ maximum_number_of_live_registers_ =
+ std::max(maximum_number_of_live_registers_, active_.Size());
+ continue;
+ }
+
// (4) Try to find an available register.
bool success = TryAllocateFreeReg(current);
@@ -926,14 +954,13 @@
LocationSummary* locations = safepoint->GetLocations();
if (!current->Covers(position)) continue;
- if (current->GetType() == Primitive::kPrimNot) {
- DCHECK(current->GetParent()->HasSpillSlot());
+ if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
}
switch (source.GetKind()) {
case Location::kRegister: {
- locations->SetLiveRegister(source.reg().RegId());
+ locations->AddLiveRegister(source);
if (current->GetType() == Primitive::kPrimNot) {
locations->SetRegisterBit(source.reg().RegId());
}
@@ -1016,7 +1043,8 @@
}
void RegisterAllocator::Resolve() {
- codegen_->ComputeFrameSize(spill_slots_.Size(), reserved_out_slots_);
+ codegen_->ComputeFrameSize(
+ spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_);
// Adjust the Out Location of instructions.
// TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 7d397e3..3c305c8 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -179,6 +179,9 @@
// Slots reserved for out arguments.
size_t reserved_out_slots_;
+ // The maximum live registers at safepoints.
+ size_t maximum_number_of_live_registers_;
+
FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
DISALLOW_COPY_AND_ASSIGN(RegisterAllocator);
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 33b1f1f..dea6181 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -138,7 +138,8 @@
HInstruction* defined_by = nullptr,
bool is_fixed = false,
int reg = kNoRegister,
- bool is_temp = false)
+ bool is_temp = false,
+ bool is_slow_path_safepoint = false)
: allocator_(allocator),
first_range_(nullptr),
last_range_(nullptr),
@@ -150,8 +151,14 @@
spill_slot_(kNoSpillSlot),
is_fixed_(is_fixed),
is_temp_(is_temp),
+ is_slow_path_safepoint_(is_slow_path_safepoint),
defined_by_(defined_by) {}
+ static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) {
+ return new (allocator) LiveInterval(
+ allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true);
+ }
+
static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) {
return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false);
}
@@ -163,6 +170,7 @@
}
bool IsFixed() const { return is_fixed_; }
+ bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; }
void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
// Set the use within the instruction.
@@ -480,6 +488,9 @@
// Whether the interval is for a temporary.
const bool is_temp_;
+ // Whether the interval is for a safepoint that calls on slow path.
+ const bool is_slow_path_safepoint_;
+
// The instruction represented by this interval.
HInstruction* const defined_by_;