diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 22 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 26 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 36 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 36 | ||||
| -rw-r--r-- | compiler/optimizing/locations.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 9 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocation_resolver.cc | 20 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator_graph_color.cc | 41 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator_linear_scan.cc | 41 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.cc | 12 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.h | 6 |
14 files changed, 148 insertions, 115 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0b50619a66..958c1a6fdb 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id } size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + if (GetGraph()->HasSIMD()) { + __ movupd(Address(ESP, stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + } return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + if (GetGraph()->HasSIMD()) { + __ movupd(XmmRegister(reg_id), Address(ESP, stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + } return GetFloatingPointSpillSlotSize(); } @@ -5699,7 +5710,12 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() + ? RegisterSet::AllFpu() + : RegisterSet::Empty()); } void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 65ee383b54..ca3a9eadd2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - // 8 bytes == 2 words for each spill. - return 2 * kX86WordSize; + return GetGraph()->HasSIMD() + ? 4 * kX86WordSize // 16 bytes == 4 words for each spill + : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 08f1adfcff..c106d9b06e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg } size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movupd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } + return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movupd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } + return GetFloatingPointSpillSlotSize(); } void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -5152,7 +5163,12 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() + ? RegisterSet::AllFpu() + : RegisterSet::Empty()); } void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 376c3ce381..c8336dabd9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - return kX86_64WordSize; + return GetGraph()->HasSIMD() + ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill + : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index ba006edfa2..bf85b1989e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2559,7 +2559,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -2567,17 +2567,9 @@ void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // We will call memcpy() to do the actual work. Allocate the temporary - // registers to use the correct input registers, and output register. - // memcpy() uses the normal MIPS calling convention. - InvokeRuntimeCallingConvention calling_convention; - - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); - locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>())); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -2596,16 +2588,11 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dstBegin = locations->InAt(4).AsRegister<Register>(); Register dstPtr = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(dstPtr, A0); Register srcPtr = locations->GetTemp(1).AsRegister<Register>(); - DCHECK_EQ(srcPtr, A1); Register numChrs = locations->GetTemp(2).AsRegister<Register>(); - DCHECK_EQ(numChrs, A2); - - Register dstReturn = locations->GetTemp(3).AsRegister<Register>(); - DCHECK_EQ(dstReturn, V0); MipsLabel done; + MipsLabel loop; // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -2634,7 +2621,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); __ Sll(TMP, TMP, 31); - // If string is uncompressed, use memcpy() path. + // If string is uncompressed, use uncompressed path. __ Bnez(TMP, &uncompressed_copy); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. @@ -2660,10 +2647,13 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Addu(srcPtr, srcPtr, AT); } - // Calculate number of bytes to copy from number of characters. - __ Sll(numChrs, numChrs, char_shift); - - codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + __ Bind(&loop); + __ Lh(AT, srcPtr, 0); + __ Addiu(numChrs, numChrs, -1); + __ Addiu(srcPtr, srcPtr, char_size); + __ Sh(AT, dstPtr, 0); + __ Addiu(dstPtr, dstPtr, char_size); + __ Bnez(numChrs, &loop); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 21c5074a1c..1ee89cf127 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1895,7 +1895,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1903,17 +1903,9 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // We will call memcpy() to do the actual work. Allocate the temporary - // registers to use the correct input registers, and output register. - // memcpy() uses the normal MIPS calling conventions. - InvokeRuntimeCallingConvention calling_convention; - - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong); - locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1932,16 +1924,11 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>(); GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>(); - DCHECK_EQ(dstPtr, A0); GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>(); - DCHECK_EQ(srcPtr, A1); GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>(); - DCHECK_EQ(numChrs, A2); - - GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>(); - DCHECK_EQ(dstReturn, V0); Mips64Label done; + Mips64Label loop; // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -1965,7 +1952,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); __ Dext(TMP, TMP, 0, 1); - // If string is uncompressed, use memcpy() path. + // If string is uncompressed, use uncompressed path. __ Bnezc(TMP, &uncompressed_copy); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. @@ -1986,10 +1973,13 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Daddiu(srcPtr, srcObj, value_offset); __ Dlsa(srcPtr, srcBegin, srcPtr, char_shift); - // Calculate number of bytes to copy from number of characters. - __ Dsll(numChrs, numChrs, char_shift); - - codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + __ Bind(&loop); + __ Lh(AT, srcPtr, 0); + __ Daddiu(numChrs, numChrs, -1); + __ Daddiu(srcPtr, srcPtr, char_size); + __ Sh(AT, dstPtr, 0); + __ Daddiu(dstPtr, dstPtr, char_size); + __ Bnezc(numChrs, &loop); __ Bind(&done); } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 091b58a63d..d391f6913c 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -417,6 +417,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs); class RegisterSet : public ValueObject { public: static RegisterSet Empty() { return RegisterSet(); } + static RegisterSet AllFpu() { return RegisterSet(0, -1); } void Add(Location loc) { if (loc.IsRegister()) { @@ -462,6 +463,7 @@ class RegisterSet : public ValueObject { private: RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {} uint32_t core_registers_; uint32_t floating_point_registers_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 020e4463d4..ec706e6694 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasTryCatch()) { outer_graph->SetHasTryCatch(true); } + if (HasSIMD()) { + outer_graph->SetHasSIMD(true); + } HInstruction* return_value = nullptr; if (GetBlocks().size() == 3) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 542b218cf8..6881d8f6ae 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { temporaries_vreg_slots_(0), has_bounds_checks_(false), has_try_catch_(false), + has_simd_(false), has_loops_(false), has_irreducible_loops_(false), debuggable_(debuggable), @@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + bool HasSIMD() const { return has_simd_; } + void SetHasSIMD(bool value) { has_simd_ = value; } + bool HasLoops() const { return has_loops_; } void SetHasLoops(bool value) { has_loops_ = value; } @@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // false positives. bool has_try_catch_; + // Flag whether SIMD instructions appear in the graph. If true, the + // code generators may have to be more careful spilling the wider + // contents of SIMD registers. + bool has_simd_; + // Flag whether there are any loops in the graph. We can skip loop // optimization if it's false. It's only best effort to keep it up // to date in the presence of code elimination so there might be false diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 8a9c1ccaff..0d33b49fdb 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -299,11 +299,13 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { // Currently, we spill unconditionnally the current method in the code generators. && !interval->GetDefinedBy()->IsCurrentMethod()) { // We spill eagerly, so move must be at definition. - InsertMoveAfter(interval->GetDefinedBy(), - interval->ToLocation(), - interval->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) - : Location::StackSlot(interval->GetParent()->GetSpillSlot())); + Location loc; + switch (interval->NumberOfSpillSlotsNeeded()) { + case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break; + case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } + InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc); } UsePosition* use = current->GetFirstUse(); EnvUsePosition* env_use = current->GetFirstEnvironmentUse(); @@ -459,9 +461,11 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval, location_source = defined_by->GetLocations()->Out(); } else { DCHECK(defined_by->IsCurrentMethod()); - location_source = parent->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(parent->GetSpillSlot()) - : Location::StackSlot(parent->GetSpillSlot()); + switch (parent->NumberOfSpillSlotsNeeded()) { + case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break; + case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } } } else { DCHECK(source != nullptr); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 9064f865c3..87f709f63d 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); } else { interval->SetSpillSlot(catch_phi_spill_slot_counter_); - catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); } } } @@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in bool is_interval_beginning; size_t position; std::tie(position, is_interval_beginning, parent_interval) = *it; - - bool needs_two_slots = parent_interval->NeedsTwoSpillSlots(); + size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); if (is_interval_beginning) { DCHECK(!parent_interval->HasSpillSlot()); DCHECK_EQ(position, parent_interval->GetStart()); - // Find a free stack slot. + // Find first available free stack slot(s). size_t slot = 0; - for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) { - // Skip taken slots. + for (; ; ++slot) { + bool found = true; + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + if (taken.IsBitSet(s)) { + found = false; + break; // failure + } + } + if (found) { + break; // success + } } + parent_interval->SetSpillSlot(slot); - *num_stack_slots_used = std::max(*num_stack_slots_used, - needs_two_slots ? slot + 1 : slot + 2); - if (needs_two_slots && *num_stack_slots_used % 2 != 0) { + *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); + if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { // The parallel move resolver requires that there be an even number of spill slots // allocated for pair value types. ++(*num_stack_slots_used); } - taken.SetBit(slot); - if (needs_two_slots) { - taken.SetBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + taken.SetBit(s); } } else { DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); DCHECK(parent_interval->HasSpillSlot()); - // Free up the stack slot used by this interval. + // Free up the stack slot(s) used by this interval. size_t slot = parent_interval->GetSpillSlot(); - DCHECK(taken.IsBitSet(slot)); - DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1)); - taken.ClearBit(slot); - if (needs_two_slots) { - taken.ClearBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + DCHECK(taken.IsBitSet(s)); + taken.ClearBit(s); } } } diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 6354e76ec8..ab8d540359 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); } - // Find an available spill slot. + // Find first available spill slots. + size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded(); size_t slot = 0; for (size_t e = spill_slots->size(); slot < e; ++slot) { - if ((*spill_slots)[slot] <= parent->GetStart()) { - if (!parent->NeedsTwoSpillSlots()) { - // One spill slot is sufficient. - break; - } - if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) { - // Two spill slots are available. + bool found = true; + for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) { + if ((*spill_slots)[s] > parent->GetStart()) { + found = false; // failure break; } } + if (found) { + break; // success + } } + // Need new spill slots? + size_t upper = slot + number_of_spill_slots_needed; + if (upper > spill_slots->size()) { + spill_slots->resize(upper); + } + // Set slots to end. size_t end = interval->GetLastSibling()->GetEnd(); - if (parent->NeedsTwoSpillSlots()) { - if (slot + 2u > spill_slots->size()) { - // We need a new spill slot. - spill_slots->resize(slot + 2u, end); - } - (*spill_slots)[slot] = end; - (*spill_slots)[slot + 1] = end; - } else { - if (slot == spill_slots->size()) { - // We need a new spill slot. - spill_slots->push_back(end); - } else { - (*spill_slots)[slot] = end; - } + for (size_t s = slot; s < upper; s++) { + (*spill_slots)[s] = end; } // Note that the exact spill slot location will be computed when we resolve, @@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) { // TODO: Reuse spill slots when intervals of phis from different catch // blocks do not overlap. interval->SetSpillSlot(catch_phi_spill_slots_); - catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded(); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index e8e12e1a55..c0a045c33e 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -469,8 +469,8 @@ bool LiveInterval::SameRegisterKind(Location other) const { } } -bool LiveInterval::NeedsTwoSpillSlots() const { - return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble; +size_t LiveInterval::NumberOfSpillSlotsNeeded() const { + return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1; } Location LiveInterval::ToLocation() const { @@ -494,10 +494,10 @@ Location LiveInterval::ToLocation() const { if (defined_by->IsConstant()) { return defined_by->GetLocations()->Out(); } else if (GetParent()->HasSpillSlot()) { - if (NeedsTwoSpillSlots()) { - return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); - } else { - return Location::StackSlot(GetParent()->GetSpillSlot()); + switch (NumberOfSpillSlotsNeeded()) { + case 1: return Location::StackSlot(GetParent()->GetSpillSlot()); + case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); } } else { return Location(); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 340d0ccefe..e9dffc1fac 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -762,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Returns kNoRegister otherwise. int FindHintAtDefinition() const; - // Returns whether the interval needs two (Dex virtual register size `kVRegSize`) - // slots for spilling. - bool NeedsTwoSpillSlots() const; + // Returns the number of required spilling slots (measured as a multiple of the + // Dex virtual register size `kVRegSize`). + size_t NumberOfSpillSlotsNeeded() const; bool IsFloatingPoint() const { return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble; |