summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2024-01-22 09:43:55 +0000
committer VladimĂ­r Marko <vmarko@google.com> 2024-01-23 14:28:20 +0000
commitcb406b706134e0e5b444b8c705064de772391481 (patch)
tree63ede3bf02d33565cc6b6bcf89af140d85d93d05
parent0c36cea9ffc17d0fbdb3797917b095818ca15973 (diff)
Reduce memory used for blocked registers.
Represent groups of registers blocked for calls, catch blocks and irreducible loop headers by a special interval to reduce memory use and improve performance. Building the boot image on host with kArenaAllocatorCountAllocations = true kArenaAllocatorPreciseTracking = false kArenaAllocatorMemoryReportThreshold = 16 * MB dex2oat reports only `IActivityManager$Stub.onTransact` and the difference in arena memory usage is - arm: 19481332 -> 18568516 - ArenaStack: 4913380 -> 4000564 - SsaLiveness 2435300 -> 1522700 - RegAllocator 102808 -> 102592 - arm64: 19500044 -> 17893052 - ArenaStack: 5601636 -> 3994644 - SsaLiveness 3122836 -> 1516444 - RegAllocator 103560 -> 102960 Building a mini boot image (ART module dex files only) for arm64 with with the "speed" filter and `--dump-timings` and taking three samples with and without `baseline` I measured - "dex2oat took": - before: 6.181s, 6.174s, 6.235s - after: 6.056s, 6.081s, 6.069s - baseline before: 4.189s, 4.075s, 3.876s - baseline after: 3.766s, 3.769s, 3.757s - sum of the five "Compile Dex File Quick": - before: 3.720s, 3.732s, 3.756s - after: 3.593s, 3.62s, 3.605s - baseline before: 1.645s, 1.469s, 1.425s - baseline after: 1.322s, 1.320s, 1.322s Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Bug: 181943478 Change-Id: I356e7cc5a42af9b5b6bfee3dbef538d9025ca398
-rw-r--r--compiler/optimizing/register_allocator.cc116
-rw-r--r--compiler/optimizing/register_allocator.h27
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc225
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.h20
-rw-r--r--compiler/optimizing/register_allocator_test.cc7
5 files changed, 287 insertions, 108 deletions
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index f8b057d4a8..54a80555dc 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -21,6 +21,7 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
+#include "base/bit_utils_iterator.h"
#include "base/bit_vector-inl.h"
#include "code_generator.h"
#include "register_allocator_linear_scan.h"
@@ -28,12 +29,39 @@
namespace art HIDDEN {
+template <typename IsCalleeSave>
+static uint32_t GetBlockedRegistersForCall(size_t num_registers, IsCalleeSave&& is_callee_save) {
+ DCHECK_LE(num_registers, BitSizeOf<uint32_t>());
+ uint32_t mask = 0u;
+ for (size_t reg = 0; reg != num_registers; ++reg) {
+ if (!is_callee_save(reg)) {
+ mask |= 1u << reg;
+ }
+ }
+ return mask;
+}
+
+static uint32_t GetBlockedCoreRegistersForCall(size_t num_registers, const CodeGenerator* codegen) {
+ return GetBlockedRegistersForCall(
+ num_registers, [&](size_t reg) { return codegen->IsCoreCalleeSaveRegister(reg); });
+}
+
+static uint32_t GetBlockedFpRegistersForCall(size_t num_registers, const CodeGenerator* codegen) {
+ return GetBlockedRegistersForCall(
+ num_registers, [&](size_t reg) { return codegen->IsFloatingPointCalleeSaveRegister(reg); });
+}
+
RegisterAllocator::RegisterAllocator(ScopedArenaAllocator* allocator,
CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness)
: allocator_(allocator),
codegen_(codegen),
- liveness_(liveness) {}
+ liveness_(liveness),
+ num_core_registers_(codegen_->GetNumberOfCoreRegisters()),
+ num_fp_registers_(codegen_->GetNumberOfFloatingPointRegisters()),
+ core_registers_blocked_for_call_(
+ GetBlockedCoreRegistersForCall(num_core_registers_, codegen)),
+ fp_registers_blocked_for_call_(GetBlockedFpRegistersForCall(num_fp_registers_, codegen)) {}
std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocator* allocator,
CodeGenerator* codegen,
@@ -94,15 +122,81 @@ class AllRangesIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(AllRangesIterator);
};
+void RegisterAllocator::DumpRegister(std::ostream& stream,
+ int reg,
+ RegisterType register_type,
+ const CodeGenerator* codegen) {
+ switch (register_type) {
+ case RegisterType::kCoreRegister:
+ codegen->DumpCoreRegister(stream, reg);
+ break;
+ case RegisterType::kFpRegister:
+ codegen->DumpFloatingPointRegister(stream, reg);
+ break;
+ }
+}
+
+uint32_t RegisterAllocator::GetRegisterMask(LiveInterval* interval,
+ RegisterType register_type) const {
+ if (interval->HasRegister()) {
+ DCHECK_EQ(register_type == RegisterType::kFpRegister,
+ DataType::IsFloatingPointType(interval->GetType()));
+ DCHECK_LE(static_cast<size_t>(interval->GetRegister()), BitSizeOf<uint32_t>());
+ return 1u << interval->GetRegister();
+ } else if (interval->IsFixed()) {
+ DCHECK_EQ(interval->GetType(), DataType::Type::kVoid);
+ DCHECK(interval->GetFirstRange() != nullptr);
+ size_t start = interval->GetFirstRange()->GetStart();
+ bool blocked_for_call = liveness_.GetInstructionFromPosition(start / 2u) != nullptr;
+ switch (register_type) {
+ case RegisterType::kCoreRegister:
+ return blocked_for_call ? core_registers_blocked_for_call_
+ : MaxInt<uint32_t>(num_core_registers_);
+ case RegisterType::kFpRegister:
+ return blocked_for_call ? fp_registers_blocked_for_call_
+ : MaxInt<uint32_t>(num_fp_registers_);
+ }
+ } else {
+ return 0u;
+ }
+}
+
bool RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const> intervals,
size_t number_of_spill_slots,
size_t number_of_out_slots,
const CodeGenerator& codegen,
- bool processing_core_registers,
+ const SsaLivenessAnalysis* liveness,
+ RegisterType register_type,
bool log_fatal_on_failure) {
- size_t number_of_registers = processing_core_registers
+ size_t number_of_registers = (register_type == RegisterType::kCoreRegister)
? codegen.GetNumberOfCoreRegisters()
: codegen.GetNumberOfFloatingPointRegisters();
+ uint32_t registers_blocked_for_call = (register_type == RegisterType::kCoreRegister)
+ ? GetBlockedCoreRegistersForCall(number_of_registers, &codegen)
+ : GetBlockedFpRegistersForCall(number_of_registers, &codegen);
+
+ // A copy of `GetRegisterMask()` using local `number_of_registers` and
+ // `registers_blocked_for_call` instead of the cached per-type members
+ // that we cannot use in this static member function.
+ auto get_register_mask = [&](LiveInterval* interval) {
+ if (interval->HasRegister()) {
+ DCHECK_EQ(register_type == RegisterType::kFpRegister,
+ DataType::IsFloatingPointType(interval->GetType()));
+ DCHECK_LE(static_cast<size_t>(interval->GetRegister()), BitSizeOf<uint32_t>());
+ return 1u << interval->GetRegister();
+ } else if (interval->IsFixed()) {
+ DCHECK_EQ(interval->GetType(), DataType::Type::kVoid);
+ DCHECK(interval->GetFirstRange() != nullptr);
+ size_t start = interval->GetFirstRange()->GetStart();
+ CHECK(liveness != nullptr);
+ bool blocked_for_call = liveness->GetInstructionFromPosition(start / 2u) != nullptr;
+ return blocked_for_call ? registers_blocked_for_call
+ : MaxInt<uint32_t>(number_of_registers);
+ } else {
+ return 0u;
+ }
+ };
+
ScopedArenaAllocator allocator(codegen.GetGraph()->GetArenaStack());
ScopedArenaVector<ArenaBitVector*> liveness_of_values(
allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
@@ -149,13 +243,13 @@ bool RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const> interval
}
}
- if (current->HasRegister()) {
+ for (uint32_t reg : LowToHighBits(get_register_mask(current))) {
if (kIsDebugBuild && log_fatal_on_failure && !current->IsFixed()) {
// Only check when an error is fatal. Only tests code ask for non-fatal failures
// and test code may not properly fill the right information to the code generator.
- CHECK(codegen.HasAllocatedRegister(processing_core_registers, current->GetRegister()));
+ CHECK(codegen.HasAllocatedRegister(register_type == RegisterType::kCoreRegister, reg));
}
- BitVector* liveness_of_register = liveness_of_values[current->GetRegister()];
+ BitVector* liveness_of_register = liveness_of_values[reg];
for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
if (liveness_of_register->IsBitSet(j)) {
if (current->IsUsingInputRegister() && current->CanUseInputRegister()) {
@@ -168,15 +262,9 @@ bool RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const> interval
message << "(" << defined_by->DebugName() << ")";
}
message << "for ";
- if (processing_core_registers) {
- codegen.DumpCoreRegister(message, current->GetRegister());
- } else {
- codegen.DumpFloatingPointRegister(message, current->GetRegister());
- }
+ DumpRegister(message, reg, register_type, &codegen);
for (LiveInterval* interval : intervals) {
- if (interval->HasRegister()
- && interval->GetRegister() == current->GetRegister()
- && interval->CoversSlow(j)) {
+ if ((get_register_mask(interval) & (1u << reg)) != 0u && interval->CoversSlow(j)) {
message << std::endl;
if (interval->GetDefinedBy() != nullptr) {
message << interval->GetDefinedBy()->GetKind() << " ";
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 453e339cba..bc192f29d7 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -43,6 +43,11 @@ class RegisterAllocator : public DeletableArenaObject<kArenaAllocRegisterAllocat
kRegisterAllocatorGraphColor
};
+ enum class RegisterType {
+ kCoreRegister,
+ kFpRegister
+ };
+
static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan;
static std::unique_ptr<RegisterAllocator> Create(ScopedArenaAllocator* allocator,
@@ -65,7 +70,8 @@ class RegisterAllocator : public DeletableArenaObject<kArenaAllocRegisterAllocat
size_t number_of_spill_slots,
size_t number_of_out_slots,
const CodeGenerator& codegen,
- bool processing_core_registers,
+ const SsaLivenessAnalysis* liveness, // Can be null in tests.
+ RegisterType register_type,
bool log_fatal_on_failure);
static constexpr const char* kRegisterAllocatorPassName = "register";
@@ -84,9 +90,28 @@ class RegisterAllocator : public DeletableArenaObject<kArenaAllocRegisterAllocat
// to find an optimal split position.
LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to);
+ // Helper for calling the right typed codegen function for dumping a register.
+ void DumpRegister(std::ostream& stream, int reg, RegisterType register_type) const {
+ DumpRegister(stream, reg, register_type, codegen_);
+ }
+ static void DumpRegister(
+ std::ostream& stream, int reg, RegisterType register_type, const CodeGenerator* codegen);
+
+ // Get a mask of all registers for an interval.
+ // Most intervals either have or do not have a register, but we're using special fixed
+ // intervals with type `Void` to mark large sets of blocked registers for calls, catch
+ // blocks and irreducible loop headers to save memory and improve performance.
+ uint32_t GetRegisterMask(LiveInterval* interval, RegisterType register_type) const;
+
ScopedArenaAllocator* const allocator_;
CodeGenerator* const codegen_;
const SsaLivenessAnalysis& liveness_;
+
+ // Cached values calculated from codegen data.
+ const size_t num_core_registers_;
+ const size_t num_fp_registers_;
+ const uint32_t core_registers_blocked_for_call_;
+ const uint32_t fp_registers_blocked_for_call_;
};
} // namespace art
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index a3029f56c6..2b1864b52b 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -19,6 +19,7 @@
#include <iostream>
#include <sstream>
+#include "base/bit_utils_iterator.h"
#include "base/bit_vector-inl.h"
#include "base/enums.h"
#include "code_generator.h"
@@ -52,6 +53,10 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ScopedArenaAllocator* a
inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+ block_registers_for_call_interval_(
+ LiveInterval::MakeFixedInterval(allocator, kNoRegister, DataType::Type::kVoid)),
+ block_registers_special_interval_(
+ LiveInterval::MakeFixedInterval(allocator, kNoRegister, DataType::Type::kVoid)),
temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
@@ -59,7 +64,7 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ScopedArenaAllocator* a
double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
catch_phi_spill_slots_(0),
safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- processing_core_registers_(false),
+ current_register_type_(RegisterType::kCoreRegister),
number_of_registers_(-1),
registers_array_(nullptr),
blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
@@ -83,13 +88,6 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ScopedArenaAllocator* a
RegisterAllocatorLinearScan::~RegisterAllocatorLinearScan() {}
-static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) {
- if (interval == nullptr) return false;
- bool is_core_register = (interval->GetType() != DataType::Type::kFloat64)
- && (interval->GetType() != DataType::Type::kFloat32);
- return processing_core_registers == is_core_register;
-}
-
void RegisterAllocatorLinearScan::AllocateRegisters() {
AllocateRegistersInternal();
RegisterAllocationResolver(codegen_, liveness_)
@@ -103,9 +101,9 @@ void RegisterAllocatorLinearScan::AllocateRegisters() {
ArrayRef<LiveInterval* const>(temp_intervals_));
if (kIsDebugBuild) {
- processing_core_registers_ = true;
+ current_register_type_ = RegisterType::kCoreRegister;
ValidateInternal(true);
- processing_core_registers_ = false;
+ current_register_type_ = RegisterType::kFpRegister;
ValidateInternal(true);
// Check that the linear order is still correct with regards to lifetime positions.
// Since only parallel moves have been inserted during the register allocation,
@@ -128,8 +126,19 @@ void RegisterAllocatorLinearScan::AllocateRegisters() {
}
}
-void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, size_t end) {
+void RegisterAllocatorLinearScan::BlockRegister(Location location,
+ size_t position,
+ bool will_call) {
+ DCHECK(location.IsRegister() || location.IsFpuRegister());
int reg = location.reg();
+ if (will_call) {
+ uint32_t registers_blocked_for_call =
+ location.IsRegister() ? core_registers_blocked_for_call_ : fp_registers_blocked_for_call_;
+ if ((registers_blocked_for_call & (1u << reg)) != 0u) {
+ // Register is already marked as blocked by the `block_registers_for_call_interval_`.
+ return;
+ }
+ }
DCHECK(location.IsRegister() || location.IsFpuRegister());
LiveInterval* interval = location.IsRegister()
? physical_core_register_intervals_[reg]
@@ -146,20 +155,7 @@ void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start,
}
}
DCHECK(interval->GetRegister() == reg);
- interval->AddRange(start, end);
-}
-
-void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
- for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
- BlockRegister(Location::RegisterLocation(i), start, end);
- }
- }
- for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
- BlockRegister(Location::FpuRegisterLocation(i), start, end);
- }
- }
+ interval->AddRange(position, position + 1u);
}
void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
@@ -180,15 +176,25 @@ void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
// intervals belonging to the live-in set of the catch/header block to be spilled.
// TODO(ngeoffray): Phis in this block could be allocated in register.
size_t position = block->GetLifetimeStart();
- BlockRegisters(position, position + 1);
+ DCHECK_EQ(liveness_.GetInstructionFromPosition(position / 2u), nullptr);
+ block_registers_special_interval_->AddRange(position, position + 1u);
}
}
number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
kArenaAllocRegisterAllocator);
- processing_core_registers_ = true;
+ current_register_type_ = RegisterType::kCoreRegister;
unhandled_ = &unhandled_core_intervals_;
+ // Add intervals representing groups of physical registers blocked for calls,
+ // catch blocks and irreducible loop headers.
+ for (LiveInterval* block_registers_interval : { block_registers_for_call_interval_,
+ block_registers_special_interval_ }) {
+ if (block_registers_interval->GetFirstRange() != nullptr) {
+ block_registers_interval->ResetSearchCache();
+ inactive_.push_back(block_registers_interval);
+ }
+ }
for (LiveInterval* fixed : physical_core_register_intervals_) {
if (fixed != nullptr) {
// Fixed interval is added to inactive_ instead of unhandled_.
@@ -207,8 +213,17 @@ void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
kArenaAllocRegisterAllocator);
- processing_core_registers_ = false;
+ current_register_type_ = RegisterType::kFpRegister;
unhandled_ = &unhandled_fp_intervals_;
+ // Add intervals representing groups of physical registers blocked for calls,
+ // catch blocks and irreducible loop headers.
+ for (LiveInterval* block_registers_interval : { block_registers_for_call_interval_,
+ block_registers_special_interval_ }) {
+ if (block_registers_interval->GetFirstRange() != nullptr) {
+ block_registers_interval->ResetSearchCache();
+ inactive_.push_back(block_registers_interval);
+ }
+ }
for (LiveInterval* fixed : physical_fp_register_intervals_) {
if (fixed != nullptr) {
// Fixed interval is added to inactive_ instead of unhandled_.
@@ -232,14 +247,17 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction)
return;
}
- CheckForTempLiveIntervals(instruction);
- CheckForSafepoint(instruction);
- if (locations->WillCall()) {
- // If a call will happen, create fixed intervals for caller-save registers.
+ bool will_call = locations->WillCall();
+ if (will_call) {
+ // If a call will happen, add the range to a fixed interval that represents all the
+ // caller-save registers blocked at call sites.
const size_t position = instruction->GetLifetimePosition();
- BlockRegisters(position, position + 1, /* caller_save_only= */ true);
+ DCHECK_NE(liveness_.GetInstructionFromPosition(position / 2u), nullptr);
+ block_registers_for_call_interval_->AddRange(position, position + 1u);
}
- CheckForFixedInputs(instruction);
+ CheckForTempLiveIntervals(instruction, will_call);
+ CheckForSafepoint(instruction);
+ CheckForFixedInputs(instruction, will_call);
LiveInterval* current = instruction->GetLiveInterval();
if (current == nullptr)
@@ -257,7 +275,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction)
AddSafepointsFor(instruction);
current->ResetSearchCache();
- CheckForFixedOutput(instruction);
+ CheckForFixedOutput(instruction, will_call);
if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
AllocateSpillSlotForCatchPhi(instruction->AsPhi());
@@ -296,7 +314,8 @@ bool RegisterAllocatorLinearScan::TryRemoveSuspendCheckEntry(HInstruction* instr
return false;
}
-void RegisterAllocatorLinearScan::CheckForTempLiveIntervals(HInstruction* instruction) {
+void RegisterAllocatorLinearScan::CheckForTempLiveIntervals(HInstruction* instruction,
+ bool will_call) {
LocationSummary* locations = instruction->GetLocations();
size_t position = instruction->GetLifetimePosition();
@@ -304,7 +323,7 @@ void RegisterAllocatorLinearScan::CheckForTempLiveIntervals(HInstruction* instru
for (size_t i = 0; i < locations->GetTempCount(); ++i) {
Location temp = locations->GetTemp(i);
if (temp.IsRegister() || temp.IsFpuRegister()) {
- BlockRegister(temp, position, position + 1);
+ BlockRegister(temp, position, will_call);
// Ensure that an explicit temporary register is marked as being allocated.
codegen_->AddAllocatedRegister(temp);
} else {
@@ -348,18 +367,18 @@ void RegisterAllocatorLinearScan::CheckForSafepoint(HInstruction* instruction) {
}
}
-void RegisterAllocatorLinearScan::CheckForFixedInputs(HInstruction* instruction) {
+void RegisterAllocatorLinearScan::CheckForFixedInputs(HInstruction* instruction, bool will_call) {
LocationSummary* locations = instruction->GetLocations();
size_t position = instruction->GetLifetimePosition();
for (size_t i = 0; i < locations->GetInputCount(); ++i) {
Location input = locations->InAt(i);
if (input.IsRegister() || input.IsFpuRegister()) {
- BlockRegister(input, position, position + 1);
+ BlockRegister(input, position, will_call);
// Ensure that an explicit input register is marked as being allocated.
codegen_->AddAllocatedRegister(input);
} else if (input.IsPair()) {
- BlockRegister(input.ToLow(), position, position + 1);
- BlockRegister(input.ToHigh(), position, position + 1);
+ BlockRegister(input.ToLow(), position, will_call);
+ BlockRegister(input.ToHigh(), position, will_call);
// Ensure that an explicit input register pair is marked as being allocated.
codegen_->AddAllocatedRegister(input.ToLow());
codegen_->AddAllocatedRegister(input.ToHigh());
@@ -393,7 +412,7 @@ void RegisterAllocatorLinearScan::AddSafepointsFor(HInstruction* instruction) {
}
}
-void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction) {
+void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction, bool will_call) {
LocationSummary* locations = instruction->GetLocations();
size_t position = instruction->GetLifetimePosition();
LiveInterval* current = instruction->GetLiveInterval();
@@ -408,30 +427,30 @@ void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction)
if (output.IsUnallocated() && output.GetPolicy() == Location::kSameAsFirstInput) {
Location first = locations->InAt(0);
if (first.IsRegister() || first.IsFpuRegister()) {
- current->SetFrom(position + 1);
+ current->SetFrom(position + 1u);
current->SetRegister(first.reg());
} else if (first.IsPair()) {
- current->SetFrom(position + 1);
+ current->SetFrom(position + 1u);
current->SetRegister(first.low());
LiveInterval* high = current->GetHighInterval();
high->SetRegister(first.high());
- high->SetFrom(position + 1);
+ high->SetFrom(position + 1u);
}
} else if (output.IsRegister() || output.IsFpuRegister()) {
// Shift the interval's start by one to account for the blocked register.
- current->SetFrom(position + 1);
+ current->SetFrom(position + 1u);
current->SetRegister(output.reg());
- BlockRegister(output, position, position + 1);
+ BlockRegister(output, position, will_call);
// Ensure that an explicit output register is marked as being allocated.
codegen_->AddAllocatedRegister(output);
} else if (output.IsPair()) {
- current->SetFrom(position + 1);
+ current->SetFrom(position + 1u);
current->SetRegister(output.low());
LiveInterval* high = current->GetHighInterval();
high->SetRegister(output.high());
- high->SetFrom(position + 1);
- BlockRegister(output.ToLow(), position, position + 1);
- BlockRegister(output.ToHigh(), position, position + 1);
+ high->SetFrom(position + 1u);
+ BlockRegister(output.ToLow(), position, will_call);
+ BlockRegister(output.ToHigh(), position, will_call);
// Ensure that an explicit output register pair is marked as being allocated.
codegen_->AddAllocatedRegister(output.ToLow());
codegen_->AddAllocatedRegister(output.ToHigh());
@@ -470,6 +489,16 @@ class AllRangesIterator : public ValueObject {
};
bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const {
+ auto should_process = [](RegisterType current_register_type, LiveInterval* interval) {
+ if (interval == nullptr) {
+ return false;
+ }
+ RegisterType register_type = DataType::IsFloatingPointType(interval->GetType())
+ ? RegisterType::kFpRegister
+ : RegisterType::kCoreRegister;
+ return register_type == current_register_type;
+ };
+
// To simplify unit testing, we eagerly create the array of intervals, and
// call the helper method.
ScopedArenaAllocator allocator(allocator_->GetArenaStack());
@@ -477,14 +506,21 @@ bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) co
allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
+ if (should_process(current_register_type_, instruction->GetLiveInterval())) {
intervals.push_back(instruction->GetLiveInterval());
}
}
- const ScopedArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
- ? &physical_core_register_intervals_
- : &physical_fp_register_intervals_;
+ for (LiveInterval* block_registers_interval : { block_registers_for_call_interval_,
+ block_registers_special_interval_ }) {
+ if (block_registers_interval->GetFirstRange() != nullptr) {
+ intervals.push_back(block_registers_interval);
+ }
+ }
+ const ScopedArenaVector<LiveInterval*>* physical_register_intervals =
+ (current_register_type_ == RegisterType::kCoreRegister)
+ ? &physical_core_register_intervals_
+ : &physical_fp_register_intervals_;
for (LiveInterval* fixed : *physical_register_intervals) {
if (fixed != nullptr) {
intervals.push_back(fixed);
@@ -492,7 +528,7 @@ bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) co
}
for (LiveInterval* temp : temp_intervals_) {
- if (ShouldProcess(processing_core_registers_, temp)) {
+ if (should_process(current_register_type_, temp)) {
intervals.push_back(temp);
}
}
@@ -501,7 +537,8 @@ bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) co
GetNumberOfSpillSlots(),
reserved_out_slots_,
*codegen_,
- processing_core_registers_,
+ &liveness_,
+ current_register_type_,
log_fatal_on_failure);
}
@@ -514,6 +551,11 @@ void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterva
} else {
codegen_->DumpCoreRegister(stream, interval->GetRegister());
}
+ } else if (interval->IsFixed()) {
+ DCHECK_EQ(interval->GetType(), DataType::Type::kVoid);
+ DCHECK(interval == block_registers_for_call_interval_ ||
+ interval == block_registers_special_interval_);
+ stream << (interval == block_registers_for_call_interval_ ? "block-for-call" : "block-special");
} else {
stream << "spilled";
}
@@ -622,7 +664,7 @@ void RegisterAllocatorLinearScan::LinearScan() {
// (6) If the interval had a register allocated, add it to the list of active
// intervals.
if (success) {
- codegen_->AddAllocatedRegister(processing_core_registers_
+ codegen_->AddAllocatedRegister((current_register_type_ == RegisterType::kCoreRegister)
? Location::RegisterLocation(current->GetRegister())
: Location::FpuRegisterLocation(current->GetRegister()));
active_.push_back(current);
@@ -667,10 +709,14 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
free_until[i] = kMaxLifetimePosition;
}
- // For each active interval, set its register to not free.
+ // For each active interval, set its register(s) to not free.
for (LiveInterval* interval : active_) {
- DCHECK(interval->HasRegister());
- free_until[interval->GetRegister()] = 0;
+ DCHECK(interval->HasRegister() || interval->IsFixed());
+ uint32_t register_mask = GetRegisterMask(interval, current_register_type_);
+ DCHECK_NE(register_mask, 0u);
+ for (uint32_t reg : LowToHighBits(register_mask)) {
+ free_until[reg] = 0;
+ }
}
// An interval that starts an instruction (that is, it is not split), may
@@ -716,15 +762,22 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
continue;
}
- DCHECK(inactive->HasRegister());
- if (free_until[inactive->GetRegister()] == 0) {
- // Already used by some active interval. No need to intersect.
- continue;
+ DCHECK(inactive->HasRegister() || inactive->IsFixed());
+ uint32_t register_mask = GetRegisterMask(inactive, current_register_type_);
+ DCHECK_NE(register_mask, 0u);
+ for (uint32_t reg : LowToHighBits(register_mask)) {
+ if (free_until[reg] == 0) {
+ // Already used by some active interval. Clear the register bit.
+ register_mask &= ~(1u << reg);
+ }
}
- size_t next_intersection = inactive->FirstIntersectionWith(current);
- if (next_intersection != kNoLifetime) {
- free_until[inactive->GetRegister()] =
- std::min(free_until[inactive->GetRegister()], next_intersection);
+ if (register_mask != 0u) {
+ size_t next_intersection = inactive->FirstIntersectionWith(current);
+ if (next_intersection != kNoLifetime) {
+ for (uint32_t reg : LowToHighBits(register_mask)) {
+ free_until[reg] = std::min(free_until[reg], next_intersection);
+ }
+ }
}
}
@@ -783,7 +836,7 @@ bool RegisterAllocatorLinearScan::TryAllocateFreeReg(LiveInterval* current) {
}
bool RegisterAllocatorLinearScan::IsBlocked(int reg) const {
- return processing_core_registers_
+ return (current_register_type_ == RegisterType::kCoreRegister)
? blocked_core_registers_[reg]
: blocked_fp_registers_[reg];
}
@@ -814,9 +867,11 @@ int RegisterAllocatorLinearScan::FindAvailableRegisterPair(size_t* next_use, siz
}
bool RegisterAllocatorLinearScan::IsCallerSaveRegister(int reg) const {
- return processing_core_registers_
- ? !codegen_->IsCoreCalleeSaveRegister(reg)
- : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
+ uint32_t registers_blocked_for_call = (current_register_type_ == RegisterType::kCoreRegister)
+ ? core_registers_blocked_for_call_
+ : fp_registers_blocked_for_call_;
+ DCHECK_LT(static_cast<size_t>(reg), BitSizeOf<uint32_t>());
+ return (registers_blocked_for_call & (1u << reg)) != 0u;
}
int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInterval* current) const {
@@ -887,8 +942,9 @@ bool RegisterAllocatorLinearScan::TrySplitNonPairOrUnalignedPairIntervalAt(size_
size_t* next_use) {
for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
LiveInterval* active = *it;
- DCHECK(active->HasRegister());
+ // Special fixed intervals that represent multiple registers do not report having a register.
if (active->IsFixed()) continue;
+ DCHECK(active->HasRegister());
if (active->IsHighInterval()) continue;
if (first_register_use > next_use[active->GetRegister()]) continue;
@@ -939,10 +995,14 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
// For each active interval, find the next use of its register after the
// start of current.
for (LiveInterval* active : active_) {
- DCHECK(active->HasRegister());
if (active->IsFixed()) {
- next_use[active->GetRegister()] = current->GetStart();
+ uint32_t register_mask = GetRegisterMask(active, current_register_type_);
+ DCHECK_NE(register_mask, 0u);
+ for (uint32_t reg : LowToHighBits(register_mask)) {
+ next_use[reg] = current->GetStart();
+ }
} else {
+ DCHECK(active->HasRegister());
size_t use = active->FirstRegisterUseAfter(current->GetStart());
if (use != kNoLifetime) {
next_use[active->GetRegister()] = use;
@@ -963,12 +1023,15 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
continue;
}
- DCHECK(inactive->HasRegister());
+ DCHECK(inactive->HasRegister() || inactive->IsFixed());
size_t next_intersection = inactive->FirstIntersectionWith(current);
if (next_intersection != kNoLifetime) {
if (inactive->IsFixed()) {
- next_use[inactive->GetRegister()] =
- std::min(next_intersection, next_use[inactive->GetRegister()]);
+ uint32_t register_mask = GetRegisterMask(inactive, current_register_type_);
+ DCHECK_NE(register_mask, 0u);
+ for (uint32_t reg : LowToHighBits(register_mask)) {
+ next_use[reg] = std::min(next_intersection, next_use[reg]);
+ }
} else {
size_t use = inactive->FirstUseAfter(current->GetStart());
if (use != kNoLifetime) {
@@ -1042,6 +1105,8 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
LiveInterval* active = *it;
+ DCHECK_IMPLIES(active->IsFixed(),
+ (GetRegisterMask(active, current_register_type_) & (1u << reg)) == 0u);
if (active->GetRegister() == reg) {
DCHECK(!active->IsFixed());
LiveInterval* split = Split(active, current->GetStart());
@@ -1058,7 +1123,7 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) {
for (auto it = inactive_.begin(); it != inactive_.end(); ) {
LiveInterval* inactive = *it;
bool erased = false;
- if (inactive->GetRegister() == reg) {
+ if ((GetRegisterMask(inactive, current_register_type_) & (1u << reg)) != 0u) {
if (!current->IsSplit() && !inactive->IsFixed()) {
// Neither current nor inactive are fixed.
// Thanks to SSA, a non-split interval starting in a hole of an
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
index c71a9e9ff1..296c41d073 100644
--- a/compiler/optimizing/register_allocator_linear_scan.h
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -47,11 +47,11 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
void AllocateRegisters() override;
bool Validate(bool log_fatal_on_failure) override {
- processing_core_registers_ = true;
+ current_register_type_ = RegisterType::kCoreRegister;
if (!ValidateInternal(log_fatal_on_failure)) {
return false;
}
- processing_core_registers_ = false;
+ current_register_type_ = RegisterType::kFpRegister;
return ValidateInternal(log_fatal_on_failure);
}
@@ -76,8 +76,7 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
bool IsBlocked(int reg) const;
// Update the interval for the register in `location` to cover [start, end).
- void BlockRegister(Location location, size_t start, size_t end);
- void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
+ void BlockRegister(Location location, size_t position, bool will_call);
// Allocate a spill slot for the given interval. Should be called in linear
// order of interval starting positions.
@@ -100,11 +99,11 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
// If any inputs require specific registers, block those registers
// at the position of this instruction.
- void CheckForFixedInputs(HInstruction* instruction);
+ void CheckForFixedInputs(HInstruction* instruction, bool will_call);
// If the output of an instruction requires a specific register, split
// the interval and assign the register to the first part.
- void CheckForFixedOutput(HInstruction* instruction);
+ void CheckForFixedOutput(HInstruction* instruction, bool will_call);
// Add all applicable safepoints to a live interval.
// Currently depends on instruction processing order.
@@ -112,7 +111,7 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
// Collect all live intervals associated with the temporary locations
// needed by an instruction.
- void CheckForTempLiveIntervals(HInstruction* instruction);
+ void CheckForTempLiveIntervals(HInstruction* instruction, bool will_call);
// If a safe point is needed, add a synthesized interval to later record
// the number of live registers at this point.
@@ -154,6 +153,8 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
// where an instruction requires a specific register.
ScopedArenaVector<LiveInterval*> physical_core_register_intervals_;
ScopedArenaVector<LiveInterval*> physical_fp_register_intervals_;
+ LiveInterval* block_registers_for_call_interval_;
+ LiveInterval* block_registers_special_interval_; // For catch block or irreducible loop header.
// Intervals for temporaries. Such intervals cover the positions
// where an instruction requires a temporary.
@@ -176,9 +177,8 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
// Instructions that need a safepoint.
ScopedArenaVector<HInstruction*> safepoints_;
- // True if processing core registers. False if processing floating
- // point registers.
- bool processing_core_registers_;
+ // The register type we're currently processing.
+ RegisterType current_register_type_;
// Number of registers for the current register kind (core or floating point).
size_t number_of_registers_;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 0d2d20682d..ab5e5de859 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -72,7 +72,8 @@ class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTe
/* number_of_spill_slots= */ 0u,
/* number_of_out_slots= */ 0u,
codegen,
- /* processing_core_registers= */ true,
+ /*liveness=*/ nullptr,
+ RegisterAllocator::RegisterType::kCoreRegister,
/* log_fatal_on_failure= */ false);
}
@@ -475,7 +476,7 @@ TEST_F(RegisterAllocatorTest, FreeUntil) {
register_allocator.number_of_registers_ = 1;
register_allocator.registers_array_ = GetAllocator()->AllocArray<size_t>(1);
- register_allocator.processing_core_registers_ = true;
+ register_allocator.current_register_type_ = RegisterAllocator::RegisterType::kCoreRegister;
register_allocator.unhandled_ = &register_allocator.unhandled_core_intervals_;
ASSERT_TRUE(register_allocator.TryAllocateFreeReg(unhandled));
@@ -930,7 +931,7 @@ TEST_F(RegisterAllocatorTest, SpillInactive) {
// Set just one register available to make all intervals compete for the same.
register_allocator.number_of_registers_ = 1;
register_allocator.registers_array_ = GetAllocator()->AllocArray<size_t>(1);
- register_allocator.processing_core_registers_ = true;
+ register_allocator.current_register_type_ = RegisterAllocator::RegisterType::kCoreRegister;
register_allocator.unhandled_ = &register_allocator.unhandled_core_intervals_;
register_allocator.LinearScan();