diff options
author | 2024-01-22 09:43:55 +0000 | |
---|---|---|
committer | 2024-01-23 14:28:20 +0000 | |
commit | cb406b706134e0e5b444b8c705064de772391481 (patch) | |
tree | 63ede3bf02d33565cc6b6bcf89af140d85d93d05 | |
parent | 0c36cea9ffc17d0fbdb3797917b095818ca15973 (diff) |
Reduce memory used for blocked registers.
Represent groups of registers blocked for calls, catch
blocks and irreducible loop headers by a special interval
to reduce memory use and improve performance.
Building the boot image on host with
kArenaAllocatorCountAllocations = true
kArenaAllocatorPreciseTracking = false
kArenaAllocatorMemoryReportThreshold = 16 * MB
dex2oat reports only `IActivityManager$Stub.onTransact` and
the difference in arena memory usage is
- arm: 19481332 -> 18568516
- ArenaStack: 4913380 -> 4000564
- SsaLiveness 2435300 -> 1522700
- RegAllocator 102808 -> 102592
- arm64: 19500044 -> 17893052
- ArenaStack: 5601636 -> 3994644
- SsaLiveness 3122836 -> 1516444
- RegAllocator 103560 -> 102960
Building a mini boot image (ART module dex files only) for
arm64 with with the "speed" filter and `--dump-timings` and
taking three samples with and without `baseline` I measured
- "dex2oat took":
- before: 6.181s, 6.174s, 6.235s
- after: 6.056s, 6.081s, 6.069s
- baseline before: 4.189s, 4.075s, 3.876s
- baseline after: 3.766s, 3.769s, 3.757s
- sum of the five "Compile Dex File Quick":
- before: 3.720s, 3.732s, 3.756s
- after: 3.593s, 3.62s, 3.605s
- baseline before: 1.645s, 1.469s, 1.425s
- baseline after: 1.322s, 1.320s, 1.322s
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 181943478
Change-Id: I356e7cc5a42af9b5b6bfee3dbef538d9025ca398
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 116 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.h | 27 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator_linear_scan.cc | 225 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator_linear_scan.h | 20 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator_test.cc | 7 |
5 files changed, 287 insertions, 108 deletions
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index f8b057d4a8..54a80555dc 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -21,6 +21,7 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "base/bit_utils_iterator.h" #include "base/bit_vector-inl.h" #include "code_generator.h" #include "register_allocator_linear_scan.h" @@ -28,12 +29,39 @@ namespace art HIDDEN { +template <typename IsCalleeSave> +static uint32_t GetBlockedRegistersForCall(size_t num_registers, IsCalleeSave&& is_callee_save) { + DCHECK_LE(num_registers, BitSizeOf<uint32_t>()); + uint32_t mask = 0u; + for (size_t reg = 0; reg != num_registers; ++reg) { + if (!is_callee_save(reg)) { + mask |= 1u << reg; + } + } + return mask; +} + +static uint32_t GetBlockedCoreRegistersForCall(size_t num_registers, const CodeGenerator* codegen) { + return GetBlockedRegistersForCall( + num_registers, [&](size_t reg) { return codegen->IsCoreCalleeSaveRegister(reg); }); +} + +static uint32_t GetBlockedFpRegistersForCall(size_t num_registers, const CodeGenerator* codegen) { + return GetBlockedRegistersForCall( + num_registers, [&](size_t reg) { return codegen->IsFloatingPointCalleeSaveRegister(reg); }); +} + RegisterAllocator::RegisterAllocator(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) : allocator_(allocator), codegen_(codegen), - liveness_(liveness) {} + liveness_(liveness), + num_core_registers_(codegen_->GetNumberOfCoreRegisters()), + num_fp_registers_(codegen_->GetNumberOfFloatingPointRegisters()), + core_registers_blocked_for_call_( + GetBlockedCoreRegistersForCall(num_core_registers_, codegen)), + fp_registers_blocked_for_call_(GetBlockedFpRegistersForCall(num_fp_registers_, codegen)) {} std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocator* allocator, CodeGenerator* codegen, @@ -94,15 +122,81 @@ class AllRangesIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(AllRangesIterator); }; +void RegisterAllocator::DumpRegister(std::ostream& stream, + int reg, + RegisterType register_type, + const CodeGenerator* codegen) { + switch (register_type) { + case RegisterType::kCoreRegister: + codegen->DumpCoreRegister(stream, reg); + break; + case RegisterType::kFpRegister: + codegen->DumpFloatingPointRegister(stream, reg); + break; + } +} + +uint32_t RegisterAllocator::GetRegisterMask(LiveInterval* interval, + RegisterType register_type) const { + if (interval->HasRegister()) { + DCHECK_EQ(register_type == RegisterType::kFpRegister, + DataType::IsFloatingPointType(interval->GetType())); + DCHECK_LE(static_cast<size_t>(interval->GetRegister()), BitSizeOf<uint32_t>()); + return 1u << interval->GetRegister(); + } else if (interval->IsFixed()) { + DCHECK_EQ(interval->GetType(), DataType::Type::kVoid); + DCHECK(interval->GetFirstRange() != nullptr); + size_t start = interval->GetFirstRange()->GetStart(); + bool blocked_for_call = liveness_.GetInstructionFromPosition(start / 2u) != nullptr; + switch (register_type) { + case RegisterType::kCoreRegister: + return blocked_for_call ? core_registers_blocked_for_call_ + : MaxInt<uint32_t>(num_core_registers_); + case RegisterType::kFpRegister: + return blocked_for_call ? fp_registers_blocked_for_call_ + : MaxInt<uint32_t>(num_fp_registers_); + } + } else { + return 0u; + } +} + bool RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const> intervals, size_t number_of_spill_slots, size_t number_of_out_slots, const CodeGenerator& codegen, - bool processing_core_registers, + const SsaLivenessAnalysis* liveness, + RegisterType register_type, bool log_fatal_on_failure) { - size_t number_of_registers = processing_core_registers + size_t number_of_registers = (register_type == RegisterType::kCoreRegister) ? codegen.GetNumberOfCoreRegisters() : codegen.GetNumberOfFloatingPointRegisters(); + uint32_t registers_blocked_for_call = (register_type == RegisterType::kCoreRegister) + ? GetBlockedCoreRegistersForCall(number_of_registers, &codegen) + : GetBlockedFpRegistersForCall(number_of_registers, &codegen); + + // A copy of `GetRegisterMask()` using local `number_of_registers` and + // `registers_blocked_for_call` instead of the cached per-type members + // that we cannot use in this static member function. + auto get_register_mask = [&](LiveInterval* interval) { + if (interval->HasRegister()) { + DCHECK_EQ(register_type == RegisterType::kFpRegister, + DataType::IsFloatingPointType(interval->GetType())); + DCHECK_LE(static_cast<size_t>(interval->GetRegister()), BitSizeOf<uint32_t>()); + return 1u << interval->GetRegister(); + } else if (interval->IsFixed()) { + DCHECK_EQ(interval->GetType(), DataType::Type::kVoid); + DCHECK(interval->GetFirstRange() != nullptr); + size_t start = interval->GetFirstRange()->GetStart(); + CHECK(liveness != nullptr); + bool blocked_for_call = liveness->GetInstructionFromPosition(start / 2u) != nullptr; + return blocked_for_call ? registers_blocked_for_call + : MaxInt<uint32_t>(number_of_registers); + } else { + return 0u; + } + }; + ScopedArenaAllocator allocator(codegen.GetGraph()->GetArenaStack()); ScopedArenaVector<ArenaBitVector*> liveness_of_values( allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); @@ -149,13 +243,13 @@ bool RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const> interval } } - if (current->HasRegister()) { + for (uint32_t reg : LowToHighBits(get_register_mask(current))) { if (kIsDebugBuild && log_fatal_on_failure && !current->IsFixed()) { // Only check when an error is fatal. Only tests code ask for non-fatal failures // and test code may not properly fill the right information to the code generator. - CHECK(codegen.HasAllocatedRegister(processing_core_registers, current->GetRegister())); + CHECK(codegen.HasAllocatedRegister(register_type == RegisterType::kCoreRegister, reg)); } - BitVector* liveness_of_register = liveness_of_values[current->GetRegister()]; + BitVector* liveness_of_register = liveness_of_values[reg]; for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { if (liveness_of_register->IsBitSet(j)) { if (current->IsUsingInputRegister() && current->CanUseInputRegister()) { @@ -168,15 +262,9 @@ bool RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const> interval message << "(" << defined_by->DebugName() << ")"; } message << "for "; - if (processing_core_registers) { - codegen.DumpCoreRegister(message, current->GetRegister()); - } else { - codegen.DumpFloatingPointRegister(message, current->GetRegister()); - } + DumpRegister(message, reg, register_type, &codegen); for (LiveInterval* interval : intervals) { - if (interval->HasRegister() - && interval->GetRegister() == current->GetRegister() - && interval->CoversSlow(j)) { + if ((get_register_mask(interval) & (1u << reg)) != 0u && interval->CoversSlow(j)) { message << std::endl; if (interval->GetDefinedBy() != nullptr) { message << interval->GetDefinedBy()->GetKind() << " "; diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 453e339cba..bc192f29d7 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -43,6 +43,11 @@ class RegisterAllocator : public DeletableArenaObject<kArenaAllocRegisterAllocat kRegisterAllocatorGraphColor }; + enum class RegisterType { + kCoreRegister, + kFpRegister + }; + static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan; static std::unique_ptr<RegisterAllocator> Create(ScopedArenaAllocator* allocator, @@ -65,7 +70,8 @@ class RegisterAllocator : public DeletableArenaObject<kArenaAllocRegisterAllocat size_t number_of_spill_slots, size_t number_of_out_slots, const CodeGenerator& codegen, - bool processing_core_registers, + const SsaLivenessAnalysis* liveness, // Can be null in tests. + RegisterType register_type, bool log_fatal_on_failure); static constexpr const char* kRegisterAllocatorPassName = "register"; @@ -84,9 +90,28 @@ class RegisterAllocator : public DeletableArenaObject<kArenaAllocRegisterAllocat // to find an optimal split position. LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to); + // Helper for calling the right typed codegen function for dumping a register. + void DumpRegister(std::ostream& stream, int reg, RegisterType register_type) const { + DumpRegister(stream, reg, register_type, codegen_); + } + static void DumpRegister( + std::ostream& stream, int reg, RegisterType register_type, const CodeGenerator* codegen); + + // Get a mask of all registers for an interval. + // Most intervals either have or do not have a register, but we're using special fixed + // intervals with type `Void` to mark large sets of blocked registers for calls, catch + // blocks and irreducible loop headers to save memory and improve performance. + uint32_t GetRegisterMask(LiveInterval* interval, RegisterType register_type) const; + ScopedArenaAllocator* const allocator_; CodeGenerator* const codegen_; const SsaLivenessAnalysis& liveness_; + + // Cached values calculated from codegen data. + const size_t num_core_registers_; + const size_t num_fp_registers_; + const uint32_t core_registers_blocked_for_call_; + const uint32_t fp_registers_blocked_for_call_; }; } // namespace art diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index a3029f56c6..2b1864b52b 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -19,6 +19,7 @@ #include <iostream> #include <sstream> +#include "base/bit_utils_iterator.h" #include "base/bit_vector-inl.h" #include "base/enums.h" #include "code_generator.h" @@ -52,6 +53,10 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ScopedArenaAllocator* a inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)), physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + block_registers_for_call_interval_( + LiveInterval::MakeFixedInterval(allocator, kNoRegister, DataType::Type::kVoid)), + block_registers_special_interval_( + LiveInterval::MakeFixedInterval(allocator, kNoRegister, DataType::Type::kVoid)), temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), @@ -59,7 +64,7 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ScopedArenaAllocator* a double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), catch_phi_spill_slots_(0), safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), - processing_core_registers_(false), + current_register_type_(RegisterType::kCoreRegister), number_of_registers_(-1), registers_array_(nullptr), blocked_core_registers_(codegen->GetBlockedCoreRegisters()), @@ -83,13 +88,6 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ScopedArenaAllocator* a RegisterAllocatorLinearScan::~RegisterAllocatorLinearScan() {} -static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { - if (interval == nullptr) return false; - bool is_core_register = (interval->GetType() != DataType::Type::kFloat64) - && (interval->GetType() != DataType::Type::kFloat32); - return processing_core_registers == is_core_register; -} - void RegisterAllocatorLinearScan::AllocateRegisters() { AllocateRegistersInternal(); RegisterAllocationResolver(codegen_, liveness_) @@ -103,9 +101,9 @@ void RegisterAllocatorLinearScan::AllocateRegisters() { ArrayRef<LiveInterval* const>(temp_intervals_)); if (kIsDebugBuild) { - processing_core_registers_ = true; + current_register_type_ = RegisterType::kCoreRegister; ValidateInternal(true); - processing_core_registers_ = false; + current_register_type_ = RegisterType::kFpRegister; ValidateInternal(true); // Check that the linear order is still correct with regards to lifetime positions. // Since only parallel moves have been inserted during the register allocation, @@ -128,8 +126,19 @@ void RegisterAllocatorLinearScan::AllocateRegisters() { } } -void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) { +void RegisterAllocatorLinearScan::BlockRegister(Location location, + size_t position, + bool will_call) { + DCHECK(location.IsRegister() || location.IsFpuRegister()); int reg = location.reg(); + if (will_call) { + uint32_t registers_blocked_for_call = + location.IsRegister() ? core_registers_blocked_for_call_ : fp_registers_blocked_for_call_; + if ((registers_blocked_for_call & (1u << reg)) != 0u) { + // Register is already marked as blocked by the `block_registers_for_call_interval_`. + return; + } + } DCHECK(location.IsRegister() || location.IsFpuRegister()); LiveInterval* interval = location.IsRegister() ? physical_core_register_intervals_[reg] @@ -146,20 +155,7 @@ void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, } } DCHECK(interval->GetRegister() == reg); - interval->AddRange(start, end); -} - -void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) { - for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) { - BlockRegister(Location::RegisterLocation(i), start, end); - } - } - for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) { - BlockRegister(Location::FpuRegisterLocation(i), start, end); - } - } + interval->AddRange(position, position + 1u); } void RegisterAllocatorLinearScan::AllocateRegistersInternal() { @@ -180,15 +176,25 @@ void RegisterAllocatorLinearScan::AllocateRegistersInternal() { // intervals belonging to the live-in set of the catch/header block to be spilled. // TODO(ngeoffray): Phis in this block could be allocated in register. size_t position = block->GetLifetimeStart(); - BlockRegisters(position, position + 1); + DCHECK_EQ(liveness_.GetInstructionFromPosition(position / 2u), nullptr); + block_registers_special_interval_->AddRange(position, position + 1u); } } number_of_registers_ = codegen_->GetNumberOfCoreRegisters(); registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, kArenaAllocRegisterAllocator); - processing_core_registers_ = true; + current_register_type_ = RegisterType::kCoreRegister; unhandled_ = &unhandled_core_intervals_; + // Add intervals representing groups of physical registers blocked for calls, + // catch blocks and irreducible loop headers. + for (LiveInterval* block_registers_interval : { block_registers_for_call_interval_, + block_registers_special_interval_ }) { + if (block_registers_interval->GetFirstRange() != nullptr) { + block_registers_interval->ResetSearchCache(); + inactive_.push_back(block_registers_interval); + } + } for (LiveInterval* fixed : physical_core_register_intervals_) { if (fixed != nullptr) { // Fixed interval is added to inactive_ instead of unhandled_. @@ -207,8 +213,17 @@ void RegisterAllocatorLinearScan::AllocateRegistersInternal() { number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters(); registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, kArenaAllocRegisterAllocator); - processing_core_registers_ = false; + current_register_type_ = RegisterType::kFpRegister; unhandled_ = &unhandled_fp_intervals_; + // Add intervals representing groups of physical registers blocked for calls, + // catch blocks and irreducible loop headers. + for (LiveInterval* block_registers_interval : { block_registers_for_call_interval_, + block_registers_special_interval_ }) { + if (block_registers_interval->GetFirstRange() != nullptr) { + block_registers_interval->ResetSearchCache(); + inactive_.push_back(block_registers_interval); + } + } for (LiveInterval* fixed : physical_fp_register_intervals_) { if (fixed != nullptr) { // Fixed interval is added to inactive_ instead of unhandled_. @@ -232,14 +247,17 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) return; } - CheckForTempLiveIntervals(instruction); - CheckForSafepoint(instruction); - if (locations->WillCall()) { - // If a call will happen, create fixed intervals for caller-save registers. + bool will_call = locations->WillCall(); + if (will_call) { + // If a call will happen, add the range to a fixed interval that represents all the + // caller-save registers blocked at call sites. const size_t position = instruction->GetLifetimePosition(); - BlockRegisters(position, position + 1, /* caller_save_only= */ true); + DCHECK_NE(liveness_.GetInstructionFromPosition(position / 2u), nullptr); + block_registers_for_call_interval_->AddRange(position, position + 1u); } - CheckForFixedInputs(instruction); + CheckForTempLiveIntervals(instruction, will_call); + CheckForSafepoint(instruction); + CheckForFixedInputs(instruction, will_call); LiveInterval* current = instruction->GetLiveInterval(); if (current == nullptr) @@ -257,7 +275,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) AddSafepointsFor(instruction); current->ResetSearchCache(); - CheckForFixedOutput(instruction); + CheckForFixedOutput(instruction, will_call); if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { AllocateSpillSlotForCatchPhi(instruction->AsPhi()); @@ -296,7 +314,8 @@ bool RegisterAllocatorLinearScan::TryRemoveSuspendCheckEntry(HInstruction* instr return false; } -void RegisterAllocatorLinearScan::CheckForTempLiveIntervals(HInstruction* instruction) { +void RegisterAllocatorLinearScan::CheckForTempLiveIntervals(HInstruction* instruction, + bool will_call) { LocationSummary* locations = instruction->GetLocations(); size_t position = instruction->GetLifetimePosition(); @@ -304,7 +323,7 @@ void RegisterAllocatorLinearScan::CheckForTempLiveIntervals(HInstruction* instru for (size_t i = 0; i < locations->GetTempCount(); ++i) { Location temp = locations->GetTemp(i); if (temp.IsRegister() || temp.IsFpuRegister()) { - BlockRegister(temp, position, position + 1); + BlockRegister(temp, position, will_call); // Ensure that an explicit temporary register is marked as being allocated. codegen_->AddAllocatedRegister(temp); } else { @@ -348,18 +367,18 @@ void RegisterAllocatorLinearScan::CheckForSafepoint(HInstruction* instruction) { } } -void RegisterAllocatorLinearScan::CheckForFixedInputs(HInstruction* instruction) { +void RegisterAllocatorLinearScan::CheckForFixedInputs(HInstruction* instruction, bool will_call) { LocationSummary* locations = instruction->GetLocations(); size_t position = instruction->GetLifetimePosition(); for (size_t i = 0; i < locations->GetInputCount(); ++i) { Location input = locations->InAt(i); if (input.IsRegister() || input.IsFpuRegister()) { - BlockRegister(input, position, position + 1); + BlockRegister(input, position, will_call); // Ensure that an explicit input register is marked as being allocated. codegen_->AddAllocatedRegister(input); } else if (input.IsPair()) { - BlockRegister(input.ToLow(), position, position + 1); - BlockRegister(input.ToHigh(), position, position + 1); + BlockRegister(input.ToLow(), position, will_call); + BlockRegister(input.ToHigh(), position, will_call); // Ensure that an explicit input register pair is marked as being allocated. codegen_->AddAllocatedRegister(input.ToLow()); codegen_->AddAllocatedRegister(input.ToHigh()); @@ -393,7 +412,7 @@ void RegisterAllocatorLinearScan::AddSafepointsFor(HInstruction* instruction) { } } -void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction) { +void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction, bool will_call) { LocationSummary* locations = instruction->GetLocations(); size_t position = instruction->GetLifetimePosition(); LiveInterval* current = instruction->GetLiveInterval(); @@ -408,30 +427,30 @@ void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction) if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) { Location first = locations->InAt(0); if (first.IsRegister() || first.IsFpuRegister()) { - current->SetFrom(position + 1); + current->SetFrom(position + 1u); current->SetRegister(first.reg()); } else if (first.IsPair()) { - current->SetFrom(position + 1); + current->SetFrom(position + 1u); current->SetRegister(first.low()); LiveInterval* high = current->GetHighInterval(); high->SetRegister(first.high()); - high->SetFrom(position + 1); + high->SetFrom(position + 1u); } } else if (output.IsRegister() || output.IsFpuRegister()) { // Shift the interval's start by one to account for the blocked register. - current->SetFrom(position + 1); + current->SetFrom(position + 1u); current->SetRegister(output.reg()); - BlockRegister(output, position, position + 1); + BlockRegister(output, position, will_call); // Ensure that an explicit output register is marked as being allocated. codegen_->AddAllocatedRegister(output); } else if (output.IsPair()) { - current->SetFrom(position + 1); + current->SetFrom(position + 1u); current->SetRegister(output.low()); LiveInterval* high = current->GetHighInterval(); high->SetRegister(output.high()); - high->SetFrom(position + 1); - BlockRegister(output.ToLow(), position, position + 1); - BlockRegister(output.ToHigh(), position, position + 1); + high->SetFrom(position + 1u); + BlockRegister(output.ToLow(), position, will_call); + BlockRegister(output.ToHigh(), position, will_call); // Ensure that an explicit output register pair is marked as being allocated. codegen_->AddAllocatedRegister(output.ToLow()); codegen_->AddAllocatedRegister(output.ToHigh()); @@ -470,6 +489,16 @@ class AllRangesIterator : public ValueObject { }; bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const { + auto should_process = [](RegisterType current_register_type, LiveInterval* interval) { + if (interval == nullptr) { + return false; + } + RegisterType register_type = DataType::IsFloatingPointType(interval->GetType()) + ? RegisterType::kFpRegister + : RegisterType::kCoreRegister; + return register_type == current_register_type; + }; + // To simplify unit testing, we eagerly create the array of intervals, and // call the helper method. ScopedArenaAllocator allocator(allocator_->GetArenaStack()); @@ -477,14 +506,21 @@ bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) co allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { + if (should_process(current_register_type_, instruction->GetLiveInterval())) { intervals.push_back(instruction->GetLiveInterval()); } } - const ScopedArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_ - ? &physical_core_register_intervals_ - : &physical_fp_register_intervals_; + for (LiveInterval* block_registers_interval : { block_registers_for_call_interval_, + block_registers_special_interval_ }) { + if (block_registers_interval->GetFirstRange() != nullptr) { + intervals.push_back(block_registers_interval); + } + } + const ScopedArenaVector<LiveInterval*>* physical_register_intervals = + (current_register_type_ == RegisterType::kCoreRegister) + ? &physical_core_register_intervals_ + : &physical_fp_register_intervals_; for (LiveInterval* fixed : *physical_register_intervals) { if (fixed != nullptr) { intervals.push_back(fixed); @@ -492,7 +528,7 @@ bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) co } for (LiveInterval* temp : temp_intervals_) { - if (ShouldProcess(processing_core_registers_, temp)) { + if (should_process(current_register_type_, temp)) { intervals.push_back(temp); } } @@ -501,7 +537,8 @@ bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) co GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_, - processing_core_registers_, + &liveness_, + current_register_type_, log_fatal_on_failure); } @@ -514,6 +551,11 @@ void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterva } else { codegen_->DumpCoreRegister(stream, interval->GetRegister()); } + } else if (interval->IsFixed()) { + DCHECK_EQ(interval->GetType(), DataType::Type::kVoid); + DCHECK(interval == block_registers_for_call_interval_ || + interval == block_registers_special_interval_); + stream << (interval == block_registers_for_call_interval_ ? "block-for-call" : "block-special"); } else { stream << "spilled"; } @@ -622,7 +664,7 @@ void RegisterAllocatorLinearScan::LinearScan() { // (6) If the interval had a register allocated, add it to the list of active // intervals. if (success) { - codegen_->AddAllocatedRegister(processing_core_registers_ + codegen_->AddAllocatedRegister((current_register_type_ == RegisterType::kCoreRegister) ? Location::RegisterLocation(current->GetRegister()) : Location::FpuRegisterLocation(current->GetRegister())); active_.push_back(current); @@ -667,10 +709,14 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) { free_until[i] = kMaxLifetimePosition; } - // For each active interval, set its register to not free. + // For each active interval, set its register(s) to not free. for (LiveInterval* interval : active_) { - DCHECK(interval->HasRegister()); - free_until[interval->GetRegister()] = 0; + DCHECK(interval->HasRegister() || interval->IsFixed()); + uint32_t register_mask = GetRegisterMask(interval, current_register_type_); + DCHECK_NE(register_mask, 0u); + for (uint32_t reg : LowToHighBits(register_mask)) { + free_until[reg] = 0; + } } // An interval that starts an instruction (that is, it is not split), may @@ -716,15 +762,22 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) { continue; } - DCHECK(inactive->HasRegister()); - if (free_until[inactive->GetRegister()] == 0) { - // Already used by some active interval. No need to intersect. - continue; + DCHECK(inactive->HasRegister() || inactive->IsFixed()); + uint32_t register_mask = GetRegisterMask(inactive, current_register_type_); + DCHECK_NE(register_mask, 0u); + for (uint32_t reg : LowToHighBits(register_mask)) { + if (free_until[reg] == 0) { + // Already used by some active interval. Clear the register bit. + register_mask &= ~(1u << reg); + } } - size_t next_intersection = inactive->FirstIntersectionWith(current); - if (next_intersection != kNoLifetime) { - free_until[inactive->GetRegister()] = - std::min(free_until[inactive->GetRegister()], next_intersection); + if (register_mask != 0u) { + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + for (uint32_t reg : LowToHighBits(register_mask)) { + free_until[reg] = std::min(free_until[reg], next_intersection); + } + } } } @@ -783,7 +836,7 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) { } bool RegisterAllocatorLinearScan::IsBlocked(int reg) const { - return processing_core_registers_ + return (current_register_type_ == RegisterType::kCoreRegister) ? blocked_core_registers_[reg] : blocked_fp_registers_[reg]; } @@ -814,9 +867,11 @@ int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, siz } bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const { - return processing_core_registers_ - ? !codegen_->IsCoreCalleeSaveRegister(reg) - : !codegen_->IsFloatingPointCalleeSaveRegister(reg); + uint32_t registers_blocked_for_call = (current_register_type_ == RegisterType::kCoreRegister) + ? core_registers_blocked_for_call_ + : fp_registers_blocked_for_call_; + DCHECK_LT(static_cast<size_t>(reg), BitSizeOf<uint32_t>()); + return (registers_blocked_for_call & (1u << reg)) != 0u; } int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const { @@ -887,8 +942,9 @@ bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_ size_t* next_use) { for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { LiveInterval* active = *it; - DCHECK(active->HasRegister()); + // Special fixed intervals that represent multiple registers do not report having a register. if (active->IsFixed()) continue; + DCHECK(active->HasRegister()); if (active->IsHighInterval()) continue; if (first_register_use > next_use[active->GetRegister()]) continue; @@ -939,10 +995,14 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) { // For each active interval, find the next use of its register after the // start of current. for (LiveInterval* active : active_) { - DCHECK(active->HasRegister()); if (active->IsFixed()) { - next_use[active->GetRegister()] = current->GetStart(); + uint32_t register_mask = GetRegisterMask(active, current_register_type_); + DCHECK_NE(register_mask, 0u); + for (uint32_t reg : LowToHighBits(register_mask)) { + next_use[reg] = current->GetStart(); + } } else { + DCHECK(active->HasRegister()); size_t use = active->FirstRegisterUseAfter(current->GetStart()); if (use != kNoLifetime) { next_use[active->GetRegister()] = use; @@ -963,12 +1023,15 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) { DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); continue; } - DCHECK(inactive->HasRegister()); + DCHECK(inactive->HasRegister() || inactive->IsFixed()); size_t next_intersection = inactive->FirstIntersectionWith(current); if (next_intersection != kNoLifetime) { if (inactive->IsFixed()) { - next_use[inactive->GetRegister()] = - std::min(next_intersection, next_use[inactive->GetRegister()]); + uint32_t register_mask = GetRegisterMask(inactive, current_register_type_); + DCHECK_NE(register_mask, 0u); + for (uint32_t reg : LowToHighBits(register_mask)) { + next_use[reg] = std::min(next_intersection, next_use[reg]); + } } else { size_t use = inactive->FirstUseAfter(current->GetStart()); if (use != kNoLifetime) { @@ -1042,6 +1105,8 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) { for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { LiveInterval* active = *it; + DCHECK_IMPLIES(active->IsFixed(), + (GetRegisterMask(active, current_register_type_) & (1u << reg)) == 0u); if (active->GetRegister() == reg) { DCHECK(!active->IsFixed()); LiveInterval* split = Split(active, current->GetStart()); @@ -1058,7 +1123,7 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) { for (auto it = inactive_.begin(); it != inactive_.end(); ) { LiveInterval* inactive = *it; bool erased = false; - if (inactive->GetRegister() == reg) { + if ((GetRegisterMask(inactive, current_register_type_) & (1u << reg)) != 0u) { if (!current->IsSplit() && !inactive->IsFixed()) { // Neither current nor inactive are fixed. // Thanks to SSA, a non-split interval starting in a hole of an diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h index c71a9e9ff1..296c41d073 100644 --- a/compiler/optimizing/register_allocator_linear_scan.h +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -47,11 +47,11 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { void AllocateRegisters() override; bool Validate(bool log_fatal_on_failure) override { - processing_core_registers_ = true; + current_register_type_ = RegisterType::kCoreRegister; if (!ValidateInternal(log_fatal_on_failure)) { return false; } - processing_core_registers_ = false; + current_register_type_ = RegisterType::kFpRegister; return ValidateInternal(log_fatal_on_failure); } @@ -76,8 +76,7 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { bool IsBlocked(int reg) const; // Update the interval for the register in `location` to cover [start, end). - void BlockRegister(Location location, size_t start, size_t end); - void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); + void BlockRegister(Location location, size_t position, bool will_call); // Allocate a spill slot for the given interval. Should be called in linear // order of interval starting positions. @@ -100,11 +99,11 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { // If any inputs require specific registers, block those registers // at the position of this instruction. - void CheckForFixedInputs(HInstruction* instruction); + void CheckForFixedInputs(HInstruction* instruction, bool will_call); // If the output of an instruction requires a specific register, split // the interval and assign the register to the first part. - void CheckForFixedOutput(HInstruction* instruction); + void CheckForFixedOutput(HInstruction* instruction, bool will_call); // Add all applicable safepoints to a live interval. // Currently depends on instruction processing order. @@ -112,7 +111,7 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { // Collect all live intervals associated with the temporary locations // needed by an instruction. - void CheckForTempLiveIntervals(HInstruction* instruction); + void CheckForTempLiveIntervals(HInstruction* instruction, bool will_call); // If a safe point is needed, add a synthesized interval to later record // the number of live registers at this point. @@ -154,6 +153,8 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { // where an instruction requires a specific register. ScopedArenaVector<LiveInterval*> physical_core_register_intervals_; ScopedArenaVector<LiveInterval*> physical_fp_register_intervals_; + LiveInterval* block_registers_for_call_interval_; + LiveInterval* block_registers_special_interval_; // For catch block or irreducible loop header. // Intervals for temporaries. Such intervals cover the positions // where an instruction requires a temporary. @@ -176,9 +177,8 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { // Instructions that need a safepoint. ScopedArenaVector<HInstruction*> safepoints_; - // True if processing core registers. False if processing floating - // point registers. - bool processing_core_registers_; + // The register type we're currently processing. + RegisterType current_register_type_; // Number of registers for the current register kind (core or floating point). size_t number_of_registers_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 0d2d20682d..ab5e5de859 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -72,7 +72,8 @@ class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTe /* number_of_spill_slots= */ 0u, /* number_of_out_slots= */ 0u, codegen, - /* processing_core_registers= */ true, + /*liveness=*/ nullptr, + RegisterAllocator::RegisterType::kCoreRegister, /* log_fatal_on_failure= */ false); } @@ -475,7 +476,7 @@ TEST_F(RegisterAllocatorTest, FreeUntil) { register_allocator.number_of_registers_ = 1; register_allocator.registers_array_ = GetAllocator()->AllocArray<size_t>(1); - register_allocator.processing_core_registers_ = true; + register_allocator.current_register_type_ = RegisterAllocator::RegisterType::kCoreRegister; register_allocator.unhandled_ = ®ister_allocator.unhandled_core_intervals_; ASSERT_TRUE(register_allocator.TryAllocateFreeReg(unhandled)); @@ -930,7 +931,7 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // Set just one register available to make all intervals compete for the same. register_allocator.number_of_registers_ = 1; register_allocator.registers_array_ = GetAllocator()->AllocArray<size_t>(1); - register_allocator.processing_core_registers_ = true; + register_allocator.current_register_type_ = RegisterAllocator::RegisterType::kCoreRegister; register_allocator.unhandled_ = ®ister_allocator.unhandled_core_intervals_; register_allocator.LinearScan(); |