diff options
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 52 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 11 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 10 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 10 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 10 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/locations.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/locations.h | 44 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.cc | 52 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.h | 13 |
13 files changed, 194 insertions, 19 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 2547a294d4..3231c99a7b 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -44,6 +44,7 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { ComputeFrameSize(GetGraph()->GetNumberOfLocalVRegs() + GetGraph()->GetNumberOfTemporaries() + 1 /* filler */, + 0, /* the baseline compiler does not have live registers at slow path */ GetGraph()->GetMaximumNumberOfOutVRegs() + 1 /* current method */); GenerateFrameEntry(); @@ -111,10 +112,15 @@ size_t CodeGenerator::AllocateFreeRegisterInternal( return -1; } -void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, size_t number_of_out_slots) { +void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, + size_t maximum_number_of_live_registers, + size_t number_of_out_slots) { + first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; + SetFrameSize(RoundUp( number_of_spill_slots * kVRegSize + number_of_out_slots * kVRegSize + + maximum_number_of_live_registers * GetWordSize() + FrameEntrySpillSize(), kStackAlignment)); } @@ -468,4 +474,48 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { } } +size_t CodeGenerator::GetStackOffsetOfSavedRegister(size_t index) { + return first_register_slot_in_slow_path_ + index * GetWordSize(); +} + +void CodeGenerator::SaveLiveRegisters(LocationSummary* locations) { + RegisterSet* register_set = locations->GetLiveRegisters(); + uint32_t count = 0; + for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { + if (register_set->ContainsCoreRegister(i)) { + size_t stack_offset = GetStackOffsetOfSavedRegister(count); + ++count; + SaveCoreRegister(Location::StackSlot(stack_offset), i); + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); + } + } + } + + for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { + if (register_set->ContainsFloatingPointRegister(i)) { + LOG(FATAL) << "Unimplemented"; + } + } +} + +void CodeGenerator::RestoreLiveRegisters(LocationSummary* locations) { + RegisterSet* register_set = locations->GetLiveRegisters(); + uint32_t count = 0; + for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { + if (register_set->ContainsCoreRegister(i)) { + size_t stack_offset = GetStackOffsetOfSavedRegister(count); + ++count; + RestoreCoreRegister(Location::StackSlot(stack_offset), i); + } + } + + for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { + if (register_set->ContainsFloatingPointRegister(i)) { + LOG(FATAL) << "Unimplemented"; + } + } +} + } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a83d7032e3..55f5d8df5f 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -98,7 +98,9 @@ class CodeGenerator : public ArenaObject { virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; - void ComputeFrameSize(size_t number_of_spill_slots, size_t number_of_out_slots); + void ComputeFrameSize(size_t number_of_spill_slots, + size_t maximum_number_of_live_registers, + size_t number_of_out_slots); virtual size_t FrameEntrySpillSize() const = 0; int32_t GetStackSlot(HLocal* local) const; Location GetTemporaryLocation(HTemporary* temp) const; @@ -114,6 +116,8 @@ class CodeGenerator : public ArenaObject { virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) = 0; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) = 0; void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); @@ -128,6 +132,8 @@ class CodeGenerator : public ArenaObject { void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; void BuildStackMaps(std::vector<uint8_t>* vector); + void SaveLiveRegisters(LocationSummary* locations); + void RestoreLiveRegisters(LocationSummary* locations); bool IsLeafMethod() const { return is_leaf_; @@ -141,6 +147,7 @@ class CodeGenerator : public ArenaObject { CodeGenerator(HGraph* graph, size_t number_of_registers) : frame_size_(kUninitializedFrameSize), core_spill_mask_(0), + first_register_slot_in_slow_path_(0), graph_(graph), block_labels_(graph->GetArena(), 0), pc_infos_(graph->GetArena(), 32), @@ -166,9 +173,11 @@ class CodeGenerator : public ArenaObject { // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; + uint32_t first_register_slot_in_slow_path_; private: void InitLocations(HInstruction* instruction); + size_t GetStackOffsetOfSavedRegister(size_t index); HGraph* const graph_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 804d352e72..206ed13b72 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -98,10 +98,12 @@ class SuspendCheckSlowPathARM : public SlowPathCode { virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pTestSuspend).Int32Value(); __ ldr(LR, Address(TR, offset)); __ blx(LR); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); __ b(GetReturnLabel()); } @@ -182,6 +184,14 @@ void CodeGeneratorARM::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << ArmManagedRegister::FromDRegister(DRegister(reg)); } +void CodeGeneratorARM::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ str(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex())); +} + +void CodeGeneratorARM::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ ldr(static_cast<Register>(reg_id), Address(SP, stack_location.GetStackIndex())); +} + CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 2480960f32..0902fb84ec 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -132,6 +132,8 @@ class CodeGeneratorARM : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kArmWordSize; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f7b849564d..0db4311f03 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -122,8 +122,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend))); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); __ jmp(GetReturnLabel()); } @@ -161,6 +163,14 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg)); } +void CodeGeneratorX86::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ movl(Address(ESP, stack_location.GetStackIndex()), static_cast<Register>(reg_id)); +} + +void CodeGeneratorX86::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ movl(static_cast<Register>(reg_id), Address(ESP, stack_location.GetStackIndex())); +} + CodeGeneratorX86::CodeGeneratorX86(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f1be0ad5b7..ffcaf6076c 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -134,6 +134,8 @@ class CodeGeneratorX86 : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86WordSize; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 283d850ef0..56198aff3a 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -103,8 +103,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); + codegen->SaveLiveRegisters(instruction_->GetLocations()); __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pTestSuspend), true)); codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + codegen->RestoreLiveRegisters(instruction_->GetLocations()); __ jmp(GetReturnLabel()); } @@ -170,6 +172,14 @@ void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int re stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg)); } +void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) { + __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id)); +} + +void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) { + __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex())); +} + CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph) : CodeGenerator(graph, kNumberOfRegIds), location_builder_(graph, this), diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 78b60fe93c..ea21872100 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -131,6 +131,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { virtual void GenerateFrameExit() OVERRIDE; virtual void Bind(Label* label) OVERRIDE; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; + virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; + virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE; virtual size_t GetWordSize() const OVERRIDE { return kX86_64WordSize; diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index fce97bd5db..1c36cdf77c 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -28,7 +28,7 @@ LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind) call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), - live_registers_(0) { + live_registers_() { inputs_.SetSize(instruction->InputCount()); for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 041e85b846..06623b6adc 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -266,6 +266,34 @@ class Location : public ValueObject { uword value_; }; +class RegisterSet : public ValueObject { + public: + RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + + void Add(Location loc) { + // TODO: floating point registers. + core_registers_ |= (1 << loc.reg().RegId()); + } + + bool ContainsCoreRegister(uint32_t id) { + return Contains(core_registers_, id); + } + + bool ContainsFloatingPointRegister(uint32_t id) { + return Contains(floating_point_registers_, id); + } + + static bool Contains(uint32_t register_set, uint32_t reg) { + return (register_set & (1 << reg)) != 0; + } + + private: + uint32_t core_registers_; + uint32_t floating_point_registers_; + + DISALLOW_COPY_AND_ASSIGN(RegisterSet); +}; + /** * The code generator computes LocationSummary for each instruction so that * the instruction itself knows what code to generate: where to find the inputs @@ -327,6 +355,8 @@ class LocationSummary : public ArenaObject { Location Out() const { return output_; } bool CanCall() const { return call_kind_ != kNoCall; } + bool WillCall() const { return call_kind_ == kCall; } + bool OnlyCallsOnSlowPath() const { return call_kind_ == kCallOnSlowPath; } bool NeedsSafepoint() const { return CanCall(); } void SetStackBit(uint32_t index) { @@ -337,14 +367,22 @@ class LocationSummary : public ArenaObject { register_mask_ |= (1 << reg_id); } - void SetLiveRegister(uint32_t reg_id) { - live_registers_ |= (1 << reg_id); + bool RegisterContainsObject(uint32_t reg_id) { + return RegisterSet::Contains(register_mask_, reg_id); + } + + void AddLiveRegister(Location location) { + live_registers_.Add(location); } BitVector* GetStackMask() const { return stack_mask_; } + RegisterSet* GetLiveRegisters() { + return &live_registers_; + } + private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; @@ -359,7 +397,7 @@ class LocationSummary : public ArenaObject { uint32_t register_mask_; // Registers that are in use at this position. - uint32_t live_registers_; + RegisterSet live_registers_; DISALLOW_COPY_AND_ASSIGN(LocationSummary); }; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 9ba75b8da4..1ac9b78a7e 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -45,7 +45,8 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, number_of_registers_(-1), registers_array_(nullptr), blocked_registers_(allocator->AllocArray<bool>(codegen->GetNumberOfRegisters())), - reserved_out_slots_(0) { + reserved_out_slots_(0), + maximum_number_of_live_registers_(0) { codegen->SetupBlockedRegisters(blocked_registers_); physical_register_intervals_.SetSize(codegen->GetNumberOfRegisters()); // Always reserve for the current method and the graph's max out registers. @@ -157,9 +158,34 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { } } + bool core_register = (instruction->GetType() != Primitive::kPrimDouble) + && (instruction->GetType() != Primitive::kPrimFloat); + + GrowableArray<LiveInterval*>& unhandled = core_register + ? unhandled_core_intervals_ + : unhandled_fp_intervals_; + if (locations->CanCall()) { - codegen_->MarkNotLeaf(); + if (!instruction->IsSuspendCheck()) { + codegen_->MarkNotLeaf(); + } safepoints_.Add(instruction); + if (locations->OnlyCallsOnSlowPath()) { + // We add a synthesized range at this position to record the live registers + // at this position. Ideally, we could just update the safepoints when locations + // are updated, but we currently need to know the full stack size before updating + // locations (because of parameters and the fact that we don't have a frame pointer). + // And knowing the full stack size requires to know the maximum number of live + // registers at calls in slow paths. + // By adding the following interval in the algorithm, we can compute this + // maximum before updating locations. + LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction); + interval->AddRange(position, position + 1); + unhandled.Add(interval); + } + } + + if (locations->WillCall()) { // Block all registers. for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { BlockRegister(Location::RegisterLocation(ManagedRegister(i)), @@ -176,12 +202,6 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { } } - bool core_register = (instruction->GetType() != Primitive::kPrimDouble) - && (instruction->GetType() != Primitive::kPrimFloat); - GrowableArray<LiveInterval*>& unhandled = core_register - ? unhandled_core_intervals_ - : unhandled_fp_intervals_; - LiveInterval* current = instruction->GetLiveInterval(); if (current == nullptr) return; @@ -405,6 +425,14 @@ void RegisterAllocator::LinearScan() { } } + if (current->IsSlowPathSafepoint()) { + // Synthesized interval to record the maximum number of live registers + // at safepoints. No need to allocate a register for it. + maximum_number_of_live_registers_ = + std::max(maximum_number_of_live_registers_, active_.Size()); + continue; + } + // (4) Try to find an available register. bool success = TryAllocateFreeReg(current); @@ -930,14 +958,13 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LocationSummary* locations = safepoint->GetLocations(); if (!current->Covers(position)) continue; - if (current->GetType() == Primitive::kPrimNot) { - DCHECK(current->GetParent()->HasSpillSlot()); + if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); } switch (source.GetKind()) { case Location::kRegister: { - locations->SetLiveRegister(source.reg().RegId()); + locations->AddLiveRegister(source); if (current->GetType() == Primitive::kPrimNot) { locations->SetRegisterBit(source.reg().RegId()); } @@ -1020,7 +1047,8 @@ static Location FindLocationAt(LiveInterval* interval, size_t position) { } void RegisterAllocator::Resolve() { - codegen_->ComputeFrameSize(spill_slots_.Size(), reserved_out_slots_); + codegen_->ComputeFrameSize( + spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_); // Adjust the Out Location of instructions. // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 7d397e3649..3c305c8f58 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -179,6 +179,9 @@ class RegisterAllocator { // Slots reserved for out arguments. size_t reserved_out_slots_; + // The maximum live registers at safepoints. + size_t maximum_number_of_live_registers_; + FRIEND_TEST(RegisterAllocatorTest, FreeUntil); DISALLOW_COPY_AND_ASSIGN(RegisterAllocator); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 33b1f1fc9a..dea6181cb2 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -138,7 +138,8 @@ class LiveInterval : public ArenaObject { HInstruction* defined_by = nullptr, bool is_fixed = false, int reg = kNoRegister, - bool is_temp = false) + bool is_temp = false, + bool is_slow_path_safepoint = false) : allocator_(allocator), first_range_(nullptr), last_range_(nullptr), @@ -150,8 +151,14 @@ class LiveInterval : public ArenaObject { spill_slot_(kNoSpillSlot), is_fixed_(is_fixed), is_temp_(is_temp), + is_slow_path_safepoint_(is_slow_path_safepoint), defined_by_(defined_by) {} + static LiveInterval* MakeSlowPathInterval(ArenaAllocator* allocator, HInstruction* instruction) { + return new (allocator) LiveInterval( + allocator, Primitive::kPrimVoid, instruction, false, kNoRegister, false, true); + } + static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) { return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false); } @@ -163,6 +170,7 @@ class LiveInterval : public ArenaObject { } bool IsFixed() const { return is_fixed_; } + bool IsSlowPathSafepoint() const { return is_slow_path_safepoint_; } void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { // Set the use within the instruction. @@ -480,6 +488,9 @@ class LiveInterval : public ArenaObject { // Whether the interval is for a temporary. const bool is_temp_; + // Whether the interval is for a safepoint that calls on slow path. + const bool is_slow_path_safepoint_; + // The instruction represented by this interval. HInstruction* const defined_by_; |