Revert "Revert "Fully support pairs in the register allocator.""
This reverts commit c399fdc442db82dfda66e6c25518872ab0f1d24f.
Change-Id: I19f8215c4b98f2f0827e04bf7806c3ca439794e5
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8d9a7b7..9665b0e 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -632,6 +632,14 @@
break;
}
+ case Location::kRegisterPair : {
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.low());
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInRegister, location.high());
+ ++i;
+ DCHECK_LT(i, environment_size);
+ break;
+ }
+
default:
LOG(FATAL) << "Unexpected kind " << location.GetKind();
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 980de04..8c07b46 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -37,6 +37,11 @@
return static_cast<DRegister>(reg / 2);
}
+static bool ExpectedPairLayout(Location location) {
+ // We expected this for both core and fpu register pairs.
+ return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
+}
+
static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7
static constexpr int kCurrentMethodStackOffset = 0;
@@ -625,12 +630,11 @@
if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
uint32_t index = double_index_;
double_index_ += 2;
- DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) + 1,
- calling_convention.GetFpuRegisterAt(index + 1));
- DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) & 1, 0);
- return Location::FpuRegisterPairLocation(
+ Location result = Location::FpuRegisterPairLocation(
calling_convention.GetFpuRegisterAt(index),
calling_convention.GetFpuRegisterAt(index + 1));
+ DCHECK(ExpectedPairLayout(result));
+ return result;
} else {
return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
}
@@ -721,16 +725,10 @@
} else if (source.IsFpuRegister()) {
UNIMPLEMENTED(FATAL);
} else {
- // No conflict possible, so just do the moves.
DCHECK(source.IsDoubleStackSlot());
- if (destination.AsRegisterPairLow<Register>() == R1) {
- DCHECK_EQ(destination.AsRegisterPairHigh<Register>(), R2);
- __ LoadFromOffset(kLoadWord, R1, SP, source.GetStackIndex());
- __ LoadFromOffset(kLoadWord, R2, SP, source.GetHighStackIndex(kArmWordSize));
- } else {
- __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(),
- SP, source.GetStackIndex());
- }
+ DCHECK(ExpectedPairLayout(destination));
+ __ LoadFromOffset(kLoadWordPair, destination.AsRegisterPairLow<Register>(),
+ SP, source.GetStackIndex());
}
} else if (destination.IsFpuRegisterPair()) {
if (source.IsDoubleStackSlot()) {
@@ -937,6 +935,7 @@
// Condition has not been materialized, use its inputs as the
// comparison and its condition as the branch condition.
LocationSummary* locations = cond->GetLocations();
+ DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0);
Register left = locations->InAt(0).AsRegister<Register>();
if (locations->InAt(1).IsRegister()) {
__ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>()));
@@ -1284,7 +1283,9 @@
switch (neg->GetResultType()) {
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
- bool output_overlaps = (neg->GetResultType() == Primitive::kPrimLong);
+ Location::OutputOverlap output_overlaps = (neg->GetResultType() == Primitive::kPrimLong)
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap;
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), output_overlaps);
break;
@@ -1811,12 +1812,17 @@
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
switch (add->GetResultType()) {
- case Primitive::kPrimInt:
- case Primitive::kPrimLong: {
- bool output_overlaps = (add->GetResultType() == Primitive::kPrimLong);
+ case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), output_overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
break;
}
@@ -1851,7 +1857,8 @@
}
break;
- case Primitive::kPrimLong:
+ case Primitive::kPrimLong: {
+ DCHECK(second.IsRegisterPair());
__ adds(out.AsRegisterPairLow<Register>(),
first.AsRegisterPairLow<Register>(),
ShifterOperand(second.AsRegisterPairLow<Register>()));
@@ -1859,6 +1866,7 @@
first.AsRegisterPairHigh<Register>(),
ShifterOperand(second.AsRegisterPairHigh<Register>()));
break;
+ }
case Primitive::kPrimFloat:
__ vadds(out.AsFpuRegister<SRegister>(),
@@ -1881,12 +1889,17 @@
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
switch (sub->GetResultType()) {
- case Primitive::kPrimInt:
- case Primitive::kPrimLong: {
- bool output_overlaps = (sub->GetResultType() == Primitive::kPrimLong);
+ case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
- locations->SetOut(Location::RequiresRegister(), output_overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
break;
}
case Primitive::kPrimFloat:
@@ -1921,6 +1934,7 @@
}
case Primitive::kPrimLong: {
+ DCHECK(second.IsRegisterPair());
__ subs(out.AsRegisterPairLow<Register>(),
first.AsRegisterPairLow<Register>(),
ShifterOperand(second.AsRegisterPairLow<Register>()));
@@ -2056,8 +2070,7 @@
calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
locations->SetInAt(1, Location::RegisterPairLocation(
calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
- // The runtime helper puts the output in R0,R2.
- locations->SetOut(Location::RegisterPairLocation(R0, R2));
+ locations->SetOut(Location::RegisterPairLocation(R0, R1));
break;
}
case Primitive::kPrimFloat:
@@ -2094,7 +2107,7 @@
DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
- DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+ DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc());
break;
@@ -2277,8 +2290,8 @@
locations->SetInAt(0, Location::RegisterPairLocation(
calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
- // The runtime helper puts the output in R0,R2.
- locations->SetOut(Location::RegisterPairLocation(R0, R2));
+ // The runtime helper puts the output in R0,R1.
+ locations->SetOut(Location::RegisterPairLocation(R0, R1));
break;
}
default:
@@ -2332,7 +2345,7 @@
DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegister<Register>());
DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
- DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+ DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
int32_t entry_point_offset;
if (op->IsShl()) {
@@ -3336,16 +3349,11 @@
__ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex());
}
} else if (source.IsDoubleStackSlot()) {
- if (destination.IsFpuRegisterPair()) {
- __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
- SP, source.GetStackIndex());
- } else {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
- __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
- __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
- __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
- }
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
} else {
DCHECK(source.IsConstant()) << source;
HInstruction* constant = source.GetConstant();
@@ -3358,8 +3366,47 @@
__ LoadImmediate(IP, value);
__ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
}
+ } else if (constant->IsLongConstant()) {
+ int64_t value = constant->AsLongConstant()->GetValue();
+ if (destination.IsRegister()) {
+ // In the presence of long or double constants, the parallel move resolver will
+ // split the move into two, but keeps the same constant for both moves. Here,
+ // we use the low or high part depending on which register this move goes to.
+ if (destination.reg() % 2 == 0) {
+ __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value));
+ } else {
+ __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value));
+ }
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ LoadImmediate(IP, Low32Bits(value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ __ LoadImmediate(IP, High32Bits(value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+ }
+ } else if (constant->IsDoubleConstant()) {
+ double value = constant->AsDoubleConstant()->GetValue();
+ uint64_t int_value = bit_cast<uint64_t, double>(value);
+ if (destination.IsFpuRegister()) {
+ // In the presence of long or double constants, the parallel move resolver will
+ // split the move into two, but keeps the same constant for both moves. Here,
+ // we use the low or high part depending on which register this move goes to.
+ if (destination.reg() % 2 == 0) {
+ __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
+ bit_cast<float, uint32_t>(Low32Bits(int_value)));
+ } else {
+ __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
+ bit_cast<float, uint32_t>(High32Bits(int_value)));
+ }
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ LoadImmediate(IP, Low32Bits(int_value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ __ LoadImmediate(IP, High32Bits(int_value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+ }
} else {
- DCHECK(constant->IsFloatConstant());
+ DCHECK(constant->IsFloatConstant()) << constant->DebugName();
float value = constant->AsFloatConstant()->GetValue();
if (destination.IsFpuRegister()) {
__ LoadSImmediate(destination.AsFpuRegister<SRegister>(), value);
@@ -3650,7 +3697,9 @@
|| instruction->GetResultType() == Primitive::kPrimLong);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- bool output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong);
+ Location::OutputOverlap output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong)
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap;
locations->SetOut(Location::RequiresRegister(), output_overlaps);
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 0109363..271eb82 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1879,7 +1879,8 @@
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), true); // The output does overlap inputs.
+ // The output does overlap inputs.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
}
void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 9e0a5b8..df21c8e 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -142,6 +142,10 @@
codegen_.DumpFloatingPointRegister(output_, location.low());
output_ << " and ";
codegen_.DumpFloatingPointRegister(output_, location.high());
+ } else if (location.IsRegisterPair()) {
+ codegen_.DumpCoreRegister(output_, location.low());
+ output_ << " and ";
+ codegen_.DumpCoreRegister(output_, location.high());
} else {
DCHECK(location.IsDoubleStackSlot());
output_ << "2x" << location.GetStackIndex() << "(sp)";
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 9f2f9ec..990d662 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -27,7 +27,7 @@
temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
environment_(instruction->GetBlock()->GetGraph()->GetArena(),
instruction->EnvironmentSize()),
- output_overlaps_(true),
+ output_overlaps_(Location::kOutputOverlap),
call_kind_(call_kind),
stack_mask_(nullptr),
register_mask_(0),
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 68d6059..dda6c94 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -37,7 +37,10 @@
*/
class Location : public ValueObject {
public:
- static constexpr bool kNoOutputOverlap = false;
+ enum OutputOverlap {
+ kOutputOverlap,
+ kNoOutputOverlap
+ };
enum Kind {
kInvalid = 0,
@@ -468,7 +471,7 @@
return inputs_.Size();
}
- void SetOut(Location location, bool overlaps = true) {
+ void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) {
DCHECK(output_.IsUnallocated() || output_.IsInvalid());
output_overlaps_ = overlaps;
output_ = location;
@@ -561,7 +564,7 @@
}
bool OutputOverlapsWithInputs() const {
- return output_overlaps_;
+ return output_overlaps_ == Location::kOutputOverlap;
}
bool Intrinsified() const {
@@ -574,7 +577,7 @@
GrowableArray<Location> environment_;
// Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
// share the same register as the inputs.
- bool output_overlaps_;
+ Location::OutputOverlap output_overlaps_;
Location output_;
const CallKind call_kind_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 39ec22b..db89e68 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -663,7 +663,7 @@
if (GetResultType() == Primitive::kPrimLong) {
return new(GetBlock()->GetGraph()->GetArena()) HLongConstant(value);
} else {
- DCHECK(GetResultType() == Primitive::kPrimInt);
+ DCHECK_EQ(GetResultType(), Primitive::kPrimInt);
return new(GetBlock()->GetGraph()->GetArena()) HIntConstant(value);
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0bd571a..926420d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2845,18 +2845,25 @@
AddMove(source.ToLow(), destination.ToLow(), instruction);
AddMove(source.ToHigh(), destination.ToHigh(), nullptr);
} else if (source.IsPair()) {
- DCHECK(destination.IsDoubleStackSlot());
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction);
AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr);
} else if (destination.IsPair()) {
- DCHECK(source.IsDoubleStackSlot());
- AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction);
- // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to
- // always be 4.
- static constexpr int kHighOffset = 4;
- AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)),
- destination.ToHigh(),
- nullptr);
+ if (source.IsConstant()) {
+ // We put the same constant in the move. The code generator will handle which
+ // low or high part to use.
+ AddMove(source, destination.ToLow(), instruction);
+ AddMove(source, destination.ToHigh(), nullptr);
+ } else {
+ DCHECK(source.IsDoubleStackSlot());
+ AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction);
+ // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to
+ // always be 4.
+ static constexpr int kHighOffset = 4;
+ AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)),
+ destination.ToHigh(),
+ nullptr);
+ }
} else {
if (kIsDebugBuild) {
if (instruction != nullptr) {
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1b42e94..e120bc6 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -71,7 +71,10 @@
if (!Supports(instruction_set)) {
return false;
}
- if (instruction_set == kArm64 || instruction_set == kX86_64) {
+ if (instruction_set == kArm64
+ || instruction_set == kX86_64
+ || instruction_set == kArm
+ || instruction_set == kThumb2) {
return true;
}
for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) {
@@ -85,10 +88,6 @@
current->GetType() == Primitive::kPrimDouble) {
return false;
}
- } else if (instruction_set == kArm || instruction_set == kThumb2) {
- if (current->GetType() == Primitive::kPrimLong) {
- return false;
- }
}
}
}
@@ -680,7 +679,7 @@
}
}
- int reg = -1;
+ int reg = kNoRegister;
if (current->HasRegister()) {
// Some instructions have a fixed register output.
reg = current->GetRegister();
@@ -696,13 +695,13 @@
DCHECK(!IsBlocked(hint));
reg = hint;
} else if (current->IsLowInterval()) {
- reg = FindAvailableRegisterPair(free_until);
+ reg = FindAvailableRegisterPair(free_until, current->GetStart());
} else {
reg = FindAvailableRegister(free_until);
}
}
- DCHECK_NE(reg, -1);
+ DCHECK_NE(reg, kNoRegister);
// If we could not find a register, we need to spill.
if (free_until[reg] == 0) {
return false;
@@ -730,8 +729,8 @@
: blocked_fp_registers_[reg];
}
-int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use) const {
- int reg = -1;
+int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const {
+ int reg = kNoRegister;
// Pick the register pair that is used the last.
for (size_t i = 0; i < number_of_registers_; ++i) {
if (IsBlocked(i)) continue;
@@ -739,24 +738,28 @@
int high_register = GetHighForLowRegister(i);
if (IsBlocked(high_register)) continue;
int existing_high_register = GetHighForLowRegister(reg);
- if ((reg == -1) || (next_use[i] >= next_use[reg]
+ if ((reg == kNoRegister) || (next_use[i] >= next_use[reg]
&& next_use[high_register] >= next_use[existing_high_register])) {
reg = i;
if (next_use[i] == kMaxLifetimePosition
&& next_use[high_register] == kMaxLifetimePosition) {
break;
}
+ } else if (next_use[reg] <= starting_at || next_use[existing_high_register] <= starting_at) {
+ // If one of the current register is known to be unavailable, just unconditionally
+ // try a new one.
+ reg = i;
}
}
return reg;
}
int RegisterAllocator::FindAvailableRegister(size_t* next_use) const {
- int reg = -1;
+ int reg = kNoRegister;
// Pick the register that is used the last.
for (size_t i = 0; i < number_of_registers_; ++i) {
if (IsBlocked(i)) continue;
- if (reg == -1 || next_use[i] > next_use[reg]) {
+ if (reg == kNoRegister || next_use[i] > next_use[reg]) {
reg = i;
if (next_use[i] == kMaxLifetimePosition) break;
}
@@ -764,6 +767,28 @@
return reg;
}
+bool RegisterAllocator::TrySplitNonPairIntervalAt(size_t position,
+ size_t first_register_use,
+ size_t* next_use) {
+ for (size_t i = 0, e = active_.Size(); i < e; ++i) {
+ LiveInterval* active = active_.Get(i);
+ DCHECK(active->HasRegister());
+ // Split the first interval found.
+ if (first_register_use <= next_use[active->GetRegister()]
+ && !active->IsLowInterval()
+ && !active->IsHighInterval()) {
+ LiveInterval* split = Split(active, position);
+ active_.DeleteAt(i);
+ if (split != active) {
+ handled_.Add(active);
+ }
+ AddSorted(unhandled_, split);
+ return true;
+ }
+ }
+ return false;
+}
+
// Find the register that is used the last, and spill the interval
// that holds it. If the first use of `current` is after that register
// we spill `current` instead.
@@ -824,27 +849,50 @@
}
}
- int reg = -1;
+ int reg = kNoRegister;
+ bool should_spill = false;
if (current->HasRegister()) {
DCHECK(current->IsHighInterval());
reg = current->GetRegister();
+ // When allocating the low part, we made sure the high register was available.
+ DCHECK_LT(first_register_use, next_use[reg]);
} else if (current->IsLowInterval()) {
- reg = FindAvailableRegisterPair(next_use);
+ reg = FindAvailableRegisterPair(next_use, current->GetStart());
+ // We should spill if both registers are not available.
+ should_spill = (first_register_use >= next_use[reg])
+ || (first_register_use >= next_use[GetHighForLowRegister(reg)]);
} else {
DCHECK(!current->IsHighInterval());
reg = FindAvailableRegister(next_use);
+ should_spill = (first_register_use >= next_use[reg]);
}
- if ((first_register_use >= next_use[reg])
- || (current->IsLowInterval() && first_register_use >= next_use[GetHighForLowRegister(reg)])) {
+ DCHECK_NE(reg, kNoRegister);
+ if (should_spill) {
DCHECK(!current->IsHighInterval());
- // If the first use of that instruction is after the last use of the found
- // register, we split this interval just before its first register use.
- AllocateSpillSlotFor(current);
- LiveInterval* split = Split(current, first_register_use - 1);
- DCHECK_NE(current, split) << "There is not enough registers available for "
- << split->GetParent()->GetDefinedBy()->DebugName();
- AddSorted(unhandled_, split);
+ bool is_allocation_at_use_site = (current->GetStart() == (first_register_use - 1));
+ if (current->IsLowInterval()
+ && is_allocation_at_use_site
+ && TrySplitNonPairIntervalAt(current->GetStart(), first_register_use, next_use)) {
+ // If we're allocating a register for `current` because the instruction at
+ // that position requires it, but we think we should spill, then there are
+ // non-pair intervals blocking the allocation. We split the first
+ // interval found, and put ourselves first in the `unhandled_` list.
+ LiveInterval* existing = unhandled_->Peek();
+ DCHECK(existing->IsHighInterval());
+ DCHECK_EQ(existing->GetLowInterval(), current);
+ unhandled_->Add(current);
+ } else {
+ // If the first use of that instruction is after the last use of the found
+ // register, we split this interval just before its first register use.
+ AllocateSpillSlotFor(current);
+ LiveInterval* split = Split(current, first_register_use - 1);
+ DCHECK_NE(current, split) << "There is not enough registers available for "
+ << split->GetParent()->GetDefinedBy()->DebugName() << " "
+ << split->GetParent()->GetDefinedBy()->GetId()
+ << " at " << first_register_use - 1;
+ AddSorted(unhandled_, split);
+ }
return false;
} else {
// Use this register and spill the active and inactives interval that
@@ -861,6 +909,23 @@
handled_.Add(active);
}
AddSorted(unhandled_, split);
+
+ if (active->IsLowInterval() || active->IsHighInterval()) {
+ LiveInterval* other_half = active->IsLowInterval()
+ ? active->GetHighInterval()
+ : active->GetLowInterval();
+ // We also need to remove the other half from the list of actives.
+ bool found = false;
+ for (size_t j = 0; j < active_.Size(); ++j) {
+ if (active_.Get(j) == other_half) {
+ found = true;
+ active_.DeleteAt(j);
+ handled_.Add(other_half);
+ break;
+ }
+ }
+ DCHECK(found);
+ }
break;
}
}
@@ -893,6 +958,25 @@
--e;
handled_.Add(inactive);
AddSorted(unhandled_, split);
+
+ if (inactive->IsLowInterval() || inactive->IsHighInterval()) {
+ LiveInterval* other_half = inactive->IsLowInterval()
+ ? inactive->GetHighInterval()
+ : inactive->GetLowInterval();
+
+ // We also need to remove the other half from the list of inactives.
+ bool found = false;
+ for (size_t j = 0; j < inactive_.Size(); ++j) {
+ if (inactive_.Get(j) == other_half) {
+ found = true;
+ inactive_.DeleteAt(j);
+ --e;
+ handled_.Add(other_half);
+ break;
+ }
+ }
+ DCHECK(found);
+ }
}
}
}
@@ -907,7 +991,8 @@
size_t insert_at = 0;
for (size_t i = array->Size(); i > 0; --i) {
LiveInterval* current = array->Get(i - 1);
- if (current->StartsAfter(interval)) {
+ // High intervals must be processed right after their low equivalent.
+ if (current->StartsAfter(interval) && !current->IsHighInterval()) {
insert_at = i;
break;
} else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
@@ -1026,6 +1111,7 @@
static bool IsValidDestination(Location destination) {
return destination.IsRegister()
+ || destination.IsRegisterPair()
|| destination.IsFpuRegister()
|| destination.IsFpuRegisterPair()
|| destination.IsStackSlot()
@@ -1066,7 +1152,7 @@
HInstruction* instruction,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
+ DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
@@ -1130,7 +1216,7 @@
HInstruction* instruction,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
+ DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
DCHECK_EQ(block->GetSuccessors().Size(), 1u);
@@ -1160,7 +1246,7 @@
HInstruction* instruction,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
+ DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
HInstruction* first = block->GetFirstInstruction();
@@ -1178,7 +1264,7 @@
void RegisterAllocator::InsertMoveAfter(HInstruction* instruction,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
+ DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
if (instruction->IsPhi()) {
@@ -1283,6 +1369,8 @@
locations->AddLiveRegister(source);
break;
}
+
+ case Location::kRegisterPair:
case Location::kFpuRegisterPair: {
locations->AddLiveRegister(source.ToLow());
locations->AddLiveRegister(source.ToHigh());
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index ec46a77..b8f70bd 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -128,9 +128,13 @@
bool ValidateInternal(bool log_fatal_on_failure) const;
void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
void DumpAllIntervals(std::ostream& stream) const;
- int FindAvailableRegisterPair(size_t* next_use) const;
+ int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const;
int FindAvailableRegister(size_t* next_use) const;
+ // Try splitting an active non-pair interval at the given `position`.
+ // Returns whether it was successful at finding such an interval.
+ bool TrySplitNonPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use);
+
ArenaAllocator* const allocator_;
CodeGenerator* const codegen_;
const SsaLivenessAnalysis& liveness_;