diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 46 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 41 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 17 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 34 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 11 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 12 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.cc | 9 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.cc | 5 |
9 files changed, 136 insertions, 47 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index fd4e391470..d0739a6de2 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -41,8 +41,6 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) { } void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { - DCHECK_EQ(frame_size_, kUninitializedFrameSize); - Initialize(); if (!is_leaf) { MarkNotLeaf(); @@ -59,7 +57,6 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { } void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { - HGraphVisitor* location_builder = GetLocationBuilder(); HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); @@ -69,8 +66,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (is_baseline) { - current->Accept(location_builder); - InitLocations(current); + InitLocationsBaseline(current); } current->Accept(instruction_visitor); } @@ -88,7 +84,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { // The register allocator already called `InitializeCodeGeneration`, // where the frame size has been computed. - DCHECK_NE(frame_size_, kUninitializedFrameSize); DCHECK(block_order_ != nullptr); Initialize(); CompileInternal(allocator, /* is_baseline */ false); @@ -138,13 +133,22 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; - SetFrameSize(RoundUp( - number_of_spill_slots * kVRegSize - + number_of_out_slots * kVRegSize - + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() - + FrameEntrySpillSize(), - kStackAlignment)); + if (number_of_spill_slots == 0 + && !HasAllocatedCalleeSaveRegisters() + && IsLeafMethod() + && !RequiresCurrentMethod()) { + DCHECK_EQ(maximum_number_of_live_core_registers, 0u); + DCHECK_EQ(maximum_number_of_live_fp_registers, 0u); + SetFrameSize(CallPushesPC() ? GetWordSize() : 0); + } else { + SetFrameSize(RoundUp( + number_of_spill_slots * kVRegSize + + number_of_out_slots * kVRegSize + + maximum_number_of_live_core_registers * GetWordSize() + + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + + FrameEntrySpillSize(), + kStackAlignment)); + } } Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const { @@ -294,7 +298,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { } } -void CodeGenerator::InitLocations(HInstruction* instruction) { +void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) { + AllocateLocations(instruction); if (instruction->GetLocations() == nullptr) { if (instruction->IsTemporary()) { HInstruction* previous = instruction->GetPrevious(); @@ -320,6 +325,19 @@ void CodeGenerator::InitLocations(HInstruction* instruction) { } } +void CodeGenerator::AllocateLocations(HInstruction* instruction) { + instruction->Accept(GetLocationBuilder()); + LocationSummary* locations = instruction->GetLocations(); + if (!instruction->IsSuspendCheckEntry()) { + if (locations != nullptr && locations->CanCall()) { + MarkNotLeaf(); + } + if (instruction->NeedsCurrentMethod()) { + SetRequiresCurrentMethod(); + } + } +} + bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { DCHECK_EQ(block_order_->Get(current_block_index_), current); return (current_block_index_ < block_order_->Size() - 1) diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ab63b911b2..efd0c84797 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -30,7 +30,6 @@ namespace art { static size_t constexpr kVRegSize = 4; -static size_t constexpr kUninitializedFrameSize = 0; // Binary encoding of 2^32 for type double. static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); @@ -107,8 +106,6 @@ class CodeGenerator { virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; - virtual HGraphVisitor* GetLocationBuilder() = 0; - virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; virtual size_t GetFloatingPointSpillSlotSize() const = 0; @@ -196,6 +193,15 @@ class CodeGenerator { void MarkNotLeaf() { is_leaf_ = false; + requires_current_method_ = true; + } + + void SetRequiresCurrentMethod() { + requires_current_method_ = true; + } + + bool RequiresCurrentMethod() const { + return requires_current_method_; } // Clears the spill slots taken by loop phis in the `LocationSummary` of the @@ -228,6 +234,8 @@ class CodeGenerator { allocated_registers_.Add(location); } + void AllocateLocations(HInstruction* instruction); + protected: CodeGenerator(HGraph* graph, size_t number_of_core_registers, @@ -236,7 +244,7 @@ class CodeGenerator { uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options) - : frame_size_(kUninitializedFrameSize), + : frame_size_(0), core_spill_mask_(0), fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), @@ -255,6 +263,7 @@ class CodeGenerator { block_order_(nullptr), current_block_index_(0), is_leaf_(true), + requires_current_method_(false), stack_map_stream_(graph->GetArena()) {} // Register allocation logic. @@ -269,11 +278,12 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; virtual ParallelMoveResolver* GetMoveResolver() = 0; + virtual HGraphVisitor* GetLocationBuilder() = 0; + virtual HGraphVisitor* GetInstructionVisitor() = 0; // Returns the location of the first spilled entry for floating point registers, // relative to the stack pointer. uint32_t GetFpuSpillStart() const { - DCHECK_NE(frame_size_, kUninitializedFrameSize); return GetFrameSize() - FrameEntrySpillSize(); } @@ -289,6 +299,21 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + bool HasAllocatedCalleeSaveRegisters() const { + // We check the core registers against 1 because it always comprises the return PC. + return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) + || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); + } + + bool CallPushesPC() const { + InstructionSet instruction_set = GetInstructionSet(); + return instruction_set == kX86 || instruction_set == kX86_64; + } + + bool HasEmptyFrame() const { + return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; @@ -311,7 +336,7 @@ class CodeGenerator { const uint32_t fpu_callee_save_mask_; private: - void InitLocations(HInstruction* instruction); + void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); void CompileInternal(CodeAllocator* allocator, bool is_baseline); @@ -328,8 +353,12 @@ class CodeGenerator { // we are generating code for. size_t current_block_index_; + // Whether the method is a leaf method. bool is_leaf_; + // Whether an instruction in the graph accesses the current method. + bool requires_current_method_; + StackMapStream stack_map_stream_; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 78fd181dcf..1841f06226 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -396,10 +396,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(true), isa_features_(isa_features) { - // Save one extra register for baseline. Note that on thumb2, there is no easy - // instruction to restore just the PC, so this actually helps both baseline - // and non-baseline to save and restore at least two registers at entry and exit. - AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline)); // Save the PC register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(PC)); } @@ -508,6 +504,10 @@ static uint32_t LeastSignificantBit(uint32_t mask) { void CodeGeneratorARM::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + // Save one extra register for baseline. Note that on thumb2, there is no easy + // instruction to restore just the PC, so this actually helps both baseline + // and non-baseline to save and restore at least two registers at entry and exit. + core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline); DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; // We use vpush and vpop for saving and restoring floating point registers, which take @@ -529,6 +529,10 @@ void CodeGeneratorARM::GenerateFrameEntry() { DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Bind(&frame_entry_label_); + if (HasEmptyFrame()) { + return; + } + if (!skip_overflow_check) { __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); __ LoadFromOffset(kLoadWord, IP, IP, 0); @@ -547,6 +551,10 @@ void CodeGeneratorARM::GenerateFrameEntry() { } void CodeGeneratorARM::GenerateFrameExit() { + if (HasEmptyFrame()) { + __ bx(LR); + return; + } __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); @@ -1172,6 +1180,7 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok } void CodeGeneratorARM::LoadCurrentMethod(Register reg) { + DCHECK(RequiresCurrentMethod()); __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7588a29524..46f1a9b51d 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -453,24 +453,27 @@ void CodeGeneratorARM64::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } - int frame_size = GetFrameSize(); - __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); - __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); - - // Stack layout: - // sp[frame_size - 8] : lr. - // ... : other preserved core registers. - // ... : other preserved fp registers. - // ... : reserved frame space. - // sp[0] : current method. + if (!HasEmptyFrame()) { + int frame_size = GetFrameSize(); + // Stack layout: + // sp[frame_size - 8] : lr. + // ... : other preserved core registers. + // ... : other preserved fp registers. + // ... : reserved frame space. + // sp[0] : current method. + __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); + __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + } } void CodeGeneratorARM64::GenerateFrameExit() { - int frame_size = GetFrameSize(); - __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); - __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); - __ Drop(frame_size); + if (!HasEmptyFrame()) { + int frame_size = GetFrameSize(); + __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + __ Drop(frame_size); + } } void CodeGeneratorARM64::Bind(HBasicBlock* block) { @@ -961,6 +964,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type, } void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { + DCHECK(RequiresCurrentMethod()); DCHECK(current_method.IsW()); __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 98f93a418a..1a95f418bc 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -470,12 +470,16 @@ void CodeGeneratorX86::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } - __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); - __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + if (!HasEmptyFrame()) { + __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + } } void CodeGeneratorX86::GenerateFrameExit() { - __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + if (!HasEmptyFrame()) { + __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + } } void CodeGeneratorX86::Bind(HBasicBlock* block) { @@ -483,6 +487,7 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) { } void CodeGeneratorX86::LoadCurrentMethod(Register reg) { + DCHECK(RequiresCurrentMethod()); __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ed1371b64c..88f17533e8 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -487,6 +487,10 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } + if (HasEmptyFrame()) { + return; + } + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { @@ -509,6 +513,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } void CodeGeneratorX86_64::GenerateFrameExit() { + if (HasEmptyFrame()) { + return; + } uint32_t xmm_spill_location = GetFpuSpillStart(); size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { @@ -533,6 +540,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) { } void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { + DCHECK(RequiresCurrentMethod()); __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 30d869d026..9bb91d208b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1015,6 +1015,18 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; } bool HasLiveInterval() const { return live_interval_ != nullptr; } + bool IsSuspendCheckEntry() const { return IsSuspendCheck() && GetBlock()->IsEntryBlock(); } + + // Returns whether the code generation of the instruction will require to have access + // to the current method. Such instructions are: + // (1): Instructions that require an environment, as calling the runtime requires + // to walk the stack and have the current method stored at a specific stack address. + // (2): Object literals like classes and strings, that are loaded from the dex cache + // fields of the current method. + bool NeedsCurrentMethod() const { + return NeedsEnvironment() || IsLoadClass() || IsLoadString(); + } + private: HInstruction* previous_; HInstruction* next_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 3809720cb4..bfbe63f6ce 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -252,8 +252,13 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { && (instruction->GetType() != Primitive::kPrimFloat); if (locations->CanCall()) { - if (!instruction->IsSuspendCheck()) { - codegen_->MarkNotLeaf(); + if (codegen_->IsLeafMethod()) { + // TODO: We do this here because we do not want the suspend check to artificially + // create live registers. We should find another place, but this is currently the + // simplest. + DCHECK(instruction->IsSuspendCheckEntry()); + instruction->GetBlock()->RemoveInstruction(instruction); + return; } safepoints_.Add(instruction); if (locations->OnlyCallsOnSlowPath()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 1b06315fce..2a8473524f 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -115,14 +115,13 @@ void SsaLivenessAnalysis::NumberInstructions() { // to differentiate between the start and end of an instruction. Adding 2 to // the lifetime position for each instruction ensures the start of an // instruction is different than the end of the previous instruction. - HGraphVisitor* location_builder = codegen_->GetLocationBuilder(); for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block->SetLifetimeStart(lifetime_position); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HInstruction* current = inst_it.Current(); - current->Accept(location_builder); + codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); @@ -140,7 +139,7 @@ void SsaLivenessAnalysis::NumberInstructions() { for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { HInstruction* current = inst_it.Current(); - current->Accept(codegen_->GetLocationBuilder()); + codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); |