Implement double and float support for arm in register allocator.
The basic approach is:
- An instruction that needs two registers gets two intervals.
- When allocating the low part, we also allocate the high part.
- When splitting a low (or high) interval, we also split the high
(or low) equivalent.
- Allocation follows the (S/D register) requirement that low
registers are always even and the high equivalent is low + 1.
Change-Id: I06a5148e05a2ffc7e7555d08e871ed007b4c2797
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 4d8154e..ada0fb7 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -620,6 +620,14 @@
break;
}
+ case Location::kFpuRegisterPair : {
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low());
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high());
+ ++i;
+ DCHECK_LT(i, environment_size);
+ break;
+ }
+
default:
LOG(FATAL) << "Unexpected kind " << location.GetKind();
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4205ebe..9880239 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -142,6 +142,7 @@
UNIMPLEMENTED(FATAL);
UNREACHABLE();
}
+ virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3b3fb64..63f5f94 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -373,6 +373,16 @@
return kArmWordSize;
}
+size_t CodeGeneratorARM::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ __ StoreSToOffset(static_cast<SRegister>(reg_id), SP, stack_index);
+ return kArmWordSize;
+}
+
+size_t CodeGeneratorARM::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ __ LoadSFromOffset(static_cast<SRegister>(reg_id), SP, stack_index);
+ return kArmWordSize;
+}
+
CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
const ArmInstructionSetFeatures* isa_features)
: CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs),
@@ -802,7 +812,8 @@
__ LoadImmediate(IP, value);
__ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
}
- } else if (const_to_move->IsLongConstant()) {
+ } else {
+ DCHECK(const_to_move->IsLongConstant()) << const_to_move;
int64_t value = const_to_move->AsLongConstant()->GetValue();
if (location.IsRegisterPair()) {
__ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
@@ -2994,10 +3005,34 @@
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
- UNREACHABLE();
+ case Primitive::kPrimFloat: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+ Location out = locations->Out();
+ DCHECK(out.IsFpuRegister());
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+ __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset);
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+ Location out = locations->Out();
+ DCHECK(out.IsFpuRegisterPair());
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+ __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+ __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+ }
+ break;
+ }
+
case Primitive::kPrimVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -3114,12 +3149,36 @@
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
- UNREACHABLE();
+ case Primitive::kPrimFloat: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+ Location value = locations->InAt(2);
+ DCHECK(value.IsFpuRegister());
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ StoreSToOffset(value.AsFpuRegister<SRegister>(), obj, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+ __ StoreSToOffset(value.AsFpuRegister<SRegister>(), IP, data_offset);
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+ Location value = locations->InAt(2);
+ DCHECK(value.IsFpuRegisterPair());
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+ __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+ __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+ }
+ break;
+ }
+
case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << instruction->GetType();
+ LOG(FATAL) << "Unreachable type " << value_type;
UNREACHABLE();
}
}
@@ -3247,21 +3306,62 @@
if (destination.IsRegister()) {
__ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(),
SP, source.GetStackIndex());
+ } else if (destination.IsFpuRegister()) {
+ __ LoadSFromOffset(destination.AsFpuRegister<SRegister>(), SP, source.GetStackIndex());
} else {
DCHECK(destination.IsStackSlot());
__ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
__ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
}
- } else {
- DCHECK(source.IsConstant());
- DCHECK(source.GetConstant()->IsIntConstant());
- int32_t value = source.GetConstant()->AsIntConstant()->GetValue();
- if (destination.IsRegister()) {
- __ LoadImmediate(destination.AsRegister<Register>(), value);
+ } else if (source.IsFpuRegister()) {
+ if (destination.IsFpuRegister()) {
+ __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>());
} else {
DCHECK(destination.IsStackSlot());
- __ LoadImmediate(IP, value);
+ __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex());
+ }
+ } else if (source.IsFpuRegisterPair()) {
+ if (destination.IsFpuRegisterPair()) {
+ __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
+ SP, destination.GetStackIndex());
+ }
+ } else if (source.IsDoubleStackSlot()) {
+ if (destination.IsFpuRegisterPair()) {
+ __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+ SP, source.GetStackIndex());
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
__ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+ }
+ } else {
+ DCHECK(source.IsConstant()) << source;
+ HInstruction* constant = source.GetConstant();
+ if (constant->IsIntConstant()) {
+ int32_t value = constant->AsIntConstant()->GetValue();
+ if (destination.IsRegister()) {
+ __ LoadImmediate(destination.AsRegister<Register>(), value);
+ } else {
+ DCHECK(destination.IsStackSlot());
+ __ LoadImmediate(IP, value);
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ }
+ } else {
+ DCHECK(constant->IsFloatConstant());
+ float value = constant->AsFloatConstant()->GetValue();
+ if (destination.IsFpuRegister()) {
+ __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), value);
+ } else {
+ DCHECK(destination.IsStackSlot());
+ __ LoadImmediate(IP, bit_cast<int32_t, float>(value));
+ __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
+ }
}
}
}
@@ -3300,6 +3400,20 @@
Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsStackSlot()) {
Exchange(source.GetStackIndex(), destination.GetStackIndex());
+ } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+ __ vmovrs(IP, source.AsFpuRegister<SRegister>());
+ __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>());
+ __ vmovsr(destination.AsFpuRegister<SRegister>(), IP);
+ } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
+ SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>()
+ : destination.AsFpuRegister<SRegister>();
+ int mem = source.IsFpuRegister()
+ ? destination.GetStackIndex()
+ : source.GetStackIndex();
+
+ __ vmovrs(IP, reg);
+ __ LoadSFromOffset(reg, SP, mem);
+ __ StoreToOffset(kStoreWord, IP, SP, mem);
} else {
LOG(FATAL) << "Unimplemented";
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 40f4edc..c1b4eda 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -168,6 +168,8 @@
void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
size_t GetWordSize() const OVERRIDE {
return kArmWordSize;
@@ -237,6 +239,10 @@
return isa_features_;
}
+ bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
+ return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 19488a4..e4da07b 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -267,6 +267,10 @@
ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; }
+ bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ return false;
+ }
+
private:
// Labels for each block that will be compiled.
vixl::Label* block_labels_;
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 636f884..acde122 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -222,6 +222,10 @@
block_labels_.SetSize(GetGraph()->GetBlocks().Size());
}
+ bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
+ return type == Primitive::kPrimLong;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 0708864..87f6b0f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -218,6 +218,10 @@
block_labels_.SetSize(GetGraph()->GetBlocks().Size());
}
+ bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ return false;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 9ed1e45..8a7bfd6 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -30,10 +30,12 @@
HGraphVisualizerPrinter(HGraph* graph,
std::ostream& output,
const char* pass_name,
+ bool is_after_pass,
const CodeGenerator& codegen)
: HGraphVisitor(graph),
output_(output),
pass_name_(pass_name),
+ is_after_pass_(is_after_pass),
codegen_(codegen),
indent_(0) {}
@@ -136,6 +138,10 @@
output_ << "invalid";
} else if (location.IsStackSlot()) {
output_ << location.GetStackIndex() << "(sp)";
+ } else if (location.IsFpuRegisterPair()) {
+ codegen_.DumpFloatingPointRegister(output_, location.low());
+ output_ << " and ";
+ codegen_.DumpFloatingPointRegister(output_, location.high());
} else {
DCHECK(location.IsDoubleStackSlot());
output_ << "2x" << location.GetStackIndex() << "(sp)";
@@ -224,7 +230,8 @@
void Run() {
StartTag("cfg");
- PrintProperty("name", pass_name_);
+ std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)");
+ PrintProperty("name", pass_desc.c_str());
VisitInsertionOrder();
EndTag("cfg");
}
@@ -275,6 +282,7 @@
private:
std::ostream& output_;
const char* pass_name_;
+ const bool is_after_pass_;
const CodeGenerator& codegen_;
size_t indent_;
@@ -295,7 +303,7 @@
}
is_enabled_ = true;
- HGraphVisualizerPrinter printer(graph_, *output_, "", codegen_);
+ HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_);
printer.StartTag("compilation");
printer.PrintProperty("name", method_name);
printer.PrintProperty("method", method_name);
@@ -305,8 +313,7 @@
void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const {
if (is_enabled_) {
- std::string pass_desc = std::string(pass_name) + (is_after_pass ? " (after)" : " (before)");
- HGraphVisualizerPrinter printer(graph_, *output_, pass_desc.c_str(), codegen_);
+ HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_);
printer.Run();
}
}
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 1ff26d9..7df99d4 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -160,6 +160,16 @@
return GetPayload();
}
+ int low() const {
+ DCHECK(IsPair());
+ return GetPayload() >> 16;
+ }
+
+ int high() const {
+ DCHECK(IsPair());
+ return GetPayload() & 0xFFFF;
+ }
+
template <typename T>
T AsRegister() const {
DCHECK(IsRegister());
@@ -175,25 +185,41 @@
template <typename T>
T AsRegisterPairLow() const {
DCHECK(IsRegisterPair());
- return static_cast<T>(GetPayload() >> 16);
+ return static_cast<T>(low());
}
template <typename T>
T AsRegisterPairHigh() const {
DCHECK(IsRegisterPair());
- return static_cast<T>(GetPayload() & 0xFFFF);
+ return static_cast<T>(high());
}
template <typename T>
T AsFpuRegisterPairLow() const {
DCHECK(IsFpuRegisterPair());
- return static_cast<T>(GetPayload() >> 16);
+ return static_cast<T>(low());
}
template <typename T>
T AsFpuRegisterPairHigh() const {
DCHECK(IsFpuRegisterPair());
- return static_cast<T>(GetPayload() & 0xFFFF);
+ return static_cast<T>(high());
+ }
+
+ bool IsPair() const {
+ return IsRegisterPair() || IsFpuRegisterPair();
+ }
+
+ Location ToLow() const {
+ return IsRegisterPair()
+ ? Location::RegisterLocation(low())
+ : Location::FpuRegisterLocation(low());
+ }
+
+ Location ToHigh() const {
+ return IsRegisterPair()
+ ? Location::RegisterLocation(high())
+ : Location::FpuRegisterLocation(high());
}
static uintptr_t EncodeStackIndex(intptr_t stack_index) {
@@ -264,6 +290,18 @@
return value_ == other.value_;
}
+ // Returns whether this location contains `other`.
+ bool Contains(Location other) const {
+ if (Equals(other)) return true;
+ if (IsRegisterPair() && other.IsRegister()) {
+ return low() == other.reg() || high() == other.reg();
+ }
+ if (IsFpuRegisterPair() && other.IsFpuRegister()) {
+ return low() == other.reg() || high() == other.reg();
+ }
+ return false;
+ }
+
const char* DebugString() const {
switch (GetKind()) {
case kInvalid: return "I";
@@ -525,7 +563,8 @@
&& (output_.GetPolicy() == Location::kSameAsFirstInput)) {
return false;
}
- if (inputs_.Get(input_index).IsRegister() || inputs_.Get(input_index).IsFpuRegister()) {
+ Location input = inputs_.Get(input_index);
+ if (input.IsRegister() || input.IsFpuRegister() || input.IsPair()) {
return false;
}
return true;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0fc1fd8..b98bc70 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2734,7 +2734,7 @@
// True if this blocks a move from the given location.
bool Blocks(Location loc) const {
- return !IsEliminated() && source_.Equals(loc);
+ return !IsEliminated() && source_.Contains(loc);
}
// A move is redundant if it's been eliminated, if its source and
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index c1c805d..1efc52b 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -27,6 +27,12 @@
static constexpr size_t kMaxLifetimePosition = -1;
static constexpr size_t kDefaultNumberOfSpillSlots = 4;
+// For simplicity, we implement register pairs as (reg, reg + 1).
+// Note that this is a requirement for double registers on ARM, since we
+// allocate SRegister.
+static int GetHighForLowRegister(int reg) { return reg + 1; }
+static bool IsLowRegister(int reg) { return (reg & 1) == 0; }
+
RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness)
@@ -72,10 +78,16 @@
!it.Done();
it.Advance()) {
HInstruction* current = it.Current();
- if (current->GetType() == Primitive::kPrimLong ||
- current->GetType() == Primitive::kPrimFloat ||
- current->GetType() == Primitive::kPrimDouble) {
- return false;
+ if (instruction_set == kX86) {
+ if (current->GetType() == Primitive::kPrimLong ||
+ current->GetType() == Primitive::kPrimFloat ||
+ current->GetType() == Primitive::kPrimDouble) {
+ return false;
+ }
+ } else if (instruction_set == kArm || instruction_set == kThumb2) {
+ if (current->GetType() == Primitive::kPrimLong) {
+ return false;
+ }
}
}
}
@@ -130,7 +142,7 @@
: physical_fp_register_intervals_.Get(reg);
Primitive::Type type = location.IsRegister()
? Primitive::kPrimInt
- : Primitive::kPrimDouble;
+ : Primitive::kPrimFloat;
if (interval == nullptr) {
interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
if (location.IsRegister()) {
@@ -226,6 +238,12 @@
LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
temp_intervals_.Add(interval);
interval->AddRange(position, position + 1);
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ interval->AddHighInterval(true);
+ LiveInterval* high = interval->GetHighInterval();
+ temp_intervals_.Add(high);
+ unhandled_fp_intervals_.Add(high);
+ }
unhandled_fp_intervals_.Add(interval);
break;
}
@@ -279,6 +297,9 @@
Location input = locations->InAt(i);
if (input.IsRegister() || input.IsFpuRegister()) {
BlockRegister(input, position, position + 1);
+ } else if (input.IsPair()) {
+ BlockRegister(input.ToLow(), position, position + 1);
+ BlockRegister(input.ToHigh(), position, position + 1);
}
}
@@ -291,6 +312,10 @@
DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
+ if (codegen_->NeedsTwoRegisters(current->GetType())) {
+ current->AddHighInterval();
+ }
+
// Some instructions define their output in fixed register/stack slot. We need
// to ensure we know these locations before doing register allocation. For a
// given register, we create an interval that covers these locations. The register
@@ -304,14 +329,30 @@
if (first.IsRegister() || first.IsFpuRegister()) {
current->SetFrom(position + 1);
current->SetRegister(first.reg());
+ } else if (first.IsPair()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(first.low());
+ LiveInterval* high = current->GetHighInterval();
+ high->SetRegister(first.high());
+ high->SetFrom(position + 1);
}
} else if (output.IsRegister() || output.IsFpuRegister()) {
// Shift the interval's start by one to account for the blocked register.
current->SetFrom(position + 1);
current->SetRegister(output.reg());
BlockRegister(output, position, position + 1);
+ } else if (output.IsPair()) {
+ current->SetFrom(position + 1);
+ current->SetRegister(output.low());
+ LiveInterval* high = current->GetHighInterval();
+ high->SetRegister(output.high());
+ high->SetFrom(position + 1);
+ BlockRegister(output.ToLow(), position, position + 1);
+ BlockRegister(output.ToHigh(), position, position + 1);
} else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
current->SetSpillSlot(output.GetStackIndex());
+ } else {
+ DCHECK(output.IsUnallocated() || output.IsConstant());
}
// If needed, add interval to the list of unhandled intervals.
@@ -516,6 +557,7 @@
LiveInterval* current = unhandled_->Pop();
DCHECK(!current->IsFixed() && !current->HasSpillSlot());
DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart());
+ DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval());
size_t position = current->GetStart();
@@ -566,6 +608,13 @@
continue;
}
+ if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) {
+ DCHECK(!current->HasRegister());
+ // Allocating the low part was unsucessful. The splitted interval for the high part
+ // will be handled next (it is in the `unhandled_` list).
+ continue;
+ }
+
// (4) Try to find an available register.
bool success = TryAllocateFreeReg(current);
@@ -578,6 +627,9 @@
// intervals.
if (success) {
active_.Add(current);
+ if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
+ current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
+ }
}
}
}
@@ -630,26 +682,31 @@
if (current->HasRegister()) {
// Some instructions have a fixed register output.
reg = current->GetRegister();
- DCHECK_NE(free_until[reg], 0u);
+ if (free_until[reg] == 0) {
+ DCHECK(current->IsHighInterval());
+ // AllocateBlockedReg will spill the holder of the register.
+ return false;
+ }
} else {
+ DCHECK(!current->IsHighInterval());
int hint = current->FindFirstRegisterHint(free_until);
if (hint != kNoRegister) {
DCHECK(!IsBlocked(hint));
reg = hint;
+ } else if (current->IsLowInterval()) {
+ reg = FindAvailableRegisterPair(free_until);
} else {
- // Pick the register that is free the longest.
- for (size_t i = 0; i < number_of_registers_; ++i) {
- if (IsBlocked(i)) continue;
- if (reg == -1 || free_until[i] > free_until[reg]) {
- reg = i;
- if (free_until[i] == kMaxLifetimePosition) break;
- }
- }
+ reg = FindAvailableRegister(free_until);
}
}
+ DCHECK_NE(reg, -1);
// If we could not find a register, we need to spill.
- if (reg == -1 || free_until[reg] == 0) {
+ if (free_until[reg] == 0) {
+ return false;
+ }
+
+ if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) {
return false;
}
@@ -671,6 +728,40 @@
: blocked_fp_registers_[reg];
}
+int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use) const {
+ int reg = -1;
+ // Pick the register pair that is used the last.
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ if (IsBlocked(i)) continue;
+ if (!IsLowRegister(i)) continue;
+ int high_register = GetHighForLowRegister(i);
+ if (IsBlocked(high_register)) continue;
+ int existing_high_register = GetHighForLowRegister(reg);
+ if ((reg == -1) || (next_use[i] >= next_use[reg]
+ && next_use[high_register] >= next_use[existing_high_register])) {
+ reg = i;
+ if (next_use[i] == kMaxLifetimePosition
+ && next_use[high_register] == kMaxLifetimePosition) {
+ break;
+ }
+ }
+ }
+ return reg;
+}
+
+int RegisterAllocator::FindAvailableRegister(size_t* next_use) const {
+ int reg = -1;
+ // Pick the register that is used the last.
+ for (size_t i = 0; i < number_of_registers_; ++i) {
+ if (IsBlocked(i)) continue;
+ if (reg == -1 || next_use[i] > next_use[reg]) {
+ reg = i;
+ if (next_use[i] == kMaxLifetimePosition) break;
+ }
+ }
+ return reg;
+}
+
// Find the register that is used the last, and spill the interval
// that holds it. If the first use of `current` is after that register
// we spill `current` instead.
@@ -731,17 +822,20 @@
}
}
- // Pick the register that is used the last.
int reg = -1;
- for (size_t i = 0; i < number_of_registers_; ++i) {
- if (IsBlocked(i)) continue;
- if (reg == -1 || next_use[i] > next_use[reg]) {
- reg = i;
- if (next_use[i] == kMaxLifetimePosition) break;
- }
+ if (current->HasRegister()) {
+ DCHECK(current->IsHighInterval());
+ reg = current->GetRegister();
+ } else if (current->IsLowInterval()) {
+ reg = FindAvailableRegisterPair(next_use);
+ } else {
+ DCHECK(!current->IsHighInterval());
+ reg = FindAvailableRegister(next_use);
}
- if (first_register_use >= next_use[reg]) {
+ if ((first_register_use >= next_use[reg])
+ || (current->IsLowInterval() && first_register_use >= next_use[GetHighForLowRegister(reg)])) {
+ DCHECK(!current->IsHighInterval());
// If the first use of that instruction is after the last use of the found
// register, we split this interval just before its first register use.
AllocateSpillSlotFor(current);
@@ -815,23 +909,49 @@
break;
}
}
+
array->InsertAt(insert_at, interval);
+ // Insert the high interval before the low, to ensure the low is processed before.
+ if (interval->HasHighInterval()) {
+ array->InsertAt(insert_at, interval->GetHighInterval());
+ } else if (interval->HasLowInterval()) {
+ array->InsertAt(insert_at + 1, interval->GetLowInterval());
+ }
}
LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) {
- DCHECK(position >= interval->GetStart());
+ DCHECK_GE(position, interval->GetStart());
DCHECK(!interval->IsDeadAt(position));
if (position == interval->GetStart()) {
// Spill slot will be allocated when handling `interval` again.
interval->ClearRegister();
+ if (interval->HasHighInterval()) {
+ interval->GetHighInterval()->ClearRegister();
+ } else if (interval->HasLowInterval()) {
+ interval->GetLowInterval()->ClearRegister();
+ }
return interval;
} else {
LiveInterval* new_interval = interval->SplitAt(position);
+ if (interval->HasHighInterval()) {
+ LiveInterval* high = interval->GetHighInterval()->SplitAt(position);
+ new_interval->SetHighInterval(high);
+ high->SetLowInterval(new_interval);
+ } else if (interval->HasLowInterval()) {
+ LiveInterval* low = interval->GetLowInterval()->SplitAt(position);
+ new_interval->SetLowInterval(low);
+ low->SetHighInterval(new_interval);
+ }
return new_interval;
}
}
void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
+ if (interval->IsHighInterval()) {
+ // The low interval will contain the spill slot.
+ return;
+ }
+
LiveInterval* parent = interval->GetParent();
// An instruction gets a spill slot for its entire lifetime. If the parent
@@ -898,6 +1018,7 @@
static bool IsValidDestination(Location destination) {
return destination.IsRegister()
|| destination.IsFpuRegister()
+ || destination.IsFpuRegisterPair()
|| destination.IsStackSlot()
|| destination.IsDoubleStackSlot();
}
@@ -905,7 +1026,6 @@
void RegisterAllocator::AddInputMoveFor(HInstruction* user,
Location source,
Location destination) const {
- DCHECK(IsValidDestination(destination));
if (source.Equals(destination)) return;
DCHECK(!user->IsPhi());
@@ -1075,9 +1195,7 @@
if (current->HasSpillSlot() && current->HasRegister()) {
// We spill eagerly, so move must be at definition.
InsertMoveAfter(interval->GetDefinedBy(),
- interval->IsFloatingPoint()
- ? Location::FpuRegisterLocation(interval->GetRegister())
- : Location::RegisterLocation(interval->GetRegister()),
+ interval->ToLocation(),
interval->NeedsTwoSpillSlots()
? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
: Location::StackSlot(interval->GetParent()->GetSpillSlot()));
@@ -1148,6 +1266,11 @@
locations->AddLiveRegister(source);
break;
}
+ case Location::kFpuRegisterPair: {
+ locations->AddLiveRegister(source.ToLow());
+ locations->AddLiveRegister(source.ToHigh());
+ break;
+ }
case Location::kStackSlot: // Fall-through
case Location::kDoubleStackSlot: // Fall-through
case Location::kConstant: {
@@ -1307,6 +1430,10 @@
size_t temp_index = 0;
for (size_t i = 0; i < temp_intervals_.Size(); ++i) {
LiveInterval* temp = temp_intervals_.Get(i);
+ if (temp->IsHighInterval()) {
+ // High intervals can be skipped, they are already handled by the low interval.
+ continue;
+ }
HInstruction* at = liveness_.GetTempUser(temp);
if (at != current) {
temp_index = 0;
@@ -1320,14 +1447,14 @@
break;
case Primitive::kPrimDouble:
- // TODO: Support the case of ARM, where a double value
- // requires an FPU register pair (note that the ARM back end
- // does not yet use this register allocator when a method uses
- // floats or doubles).
- DCHECK(codegen_->GetInstructionSet() != kArm
- && codegen_->GetInstructionSet() != kThumb2);
- locations->SetTempAt(
- temp_index++, Location::FpuRegisterLocation(temp->GetRegister()));
+ if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
+ Location location = Location::FpuRegisterPairLocation(
+ temp->GetRegister(), temp->GetHighInterval()->GetRegister());
+ locations->SetTempAt(temp_index++, location);
+ } else {
+ locations->SetTempAt(
+ temp_index++, Location::FpuRegisterLocation(temp->GetRegister()));
+ }
break;
default:
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index cbe741c..c152a8b 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -128,6 +128,8 @@
bool ValidateInternal(bool log_fatal_on_failure) const;
void DumpInterval(std::ostream& stream, LiveInterval* interval) const;
void DumpAllIntervals(std::ostream& stream) const;
+ int FindAvailableRegisterPair(size_t* next_use) const;
+ int FindAvailableRegister(size_t* next_use) const;
ArenaAllocator* const allocator_;
CodeGenerator* const codegen_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 660a5c5..d41157b 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -419,10 +419,21 @@
}
Location LiveInterval::ToLocation() const {
+ DCHECK(!IsHighInterval());
if (HasRegister()) {
- return IsFloatingPoint()
- ? Location::FpuRegisterLocation(GetRegister())
- : Location::RegisterLocation(GetRegister());
+ if (IsFloatingPoint()) {
+ if (HasHighInterval()) {
+ return Location::FpuRegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister());
+ } else {
+ return Location::FpuRegisterLocation(GetRegister());
+ }
+ } else {
+ if (HasHighInterval()) {
+ return Location::RegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister());
+ } else {
+ return Location::RegisterLocation(GetRegister());
+ }
+ }
} else {
HInstruction* defined_by = GetParent()->GetDefinedBy();
if (defined_by->IsConstant()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 2312389..74611e1 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -77,6 +77,15 @@
stream << "[" << start_ << ", " << end_ << ")";
}
+ LiveRange* Dup(ArenaAllocator* allocator) const {
+ return new (allocator) LiveRange(
+ start_, end_, next_ == nullptr ? nullptr : next_->Dup(allocator));
+ }
+
+ LiveRange* GetLastRange() {
+ return next_ == nullptr ? this : next_->GetLastRange();
+ }
+
private:
size_t start_;
size_t end_;
@@ -123,6 +132,12 @@
stream << position_;
}
+ UsePosition* Dup(ArenaAllocator* allocator) const {
+ return new (allocator) UsePosition(
+ user_, input_index_, is_environment_, position_,
+ next_ == nullptr ? nullptr : next_->Dup(allocator));
+ }
+
private:
HInstruction* const user_;
const size_t input_index_;
@@ -478,6 +493,8 @@
}
stream << "}";
stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
+ stream << " is_high: " << IsHighInterval();
+ stream << " is_low: " << IsLowInterval();
}
LiveInterval* GetNextSibling() const { return next_sibling_; }
@@ -512,6 +529,58 @@
// Returns whether `other` and `this` share the same kind of register.
bool SameRegisterKind(Location other) const;
+ bool HasHighInterval() const {
+ return !IsHighInterval() && (GetParent()->high_or_low_interval_ != nullptr);
+ }
+
+ bool HasLowInterval() const {
+ return IsHighInterval();
+ }
+
+ LiveInterval* GetLowInterval() const {
+ DCHECK(HasLowInterval());
+ return high_or_low_interval_;
+ }
+
+ LiveInterval* GetHighInterval() const {
+ DCHECK(HasHighInterval());
+ return high_or_low_interval_;
+ }
+
+ bool IsHighInterval() const {
+ return GetParent()->is_high_interval_;
+ }
+
+ bool IsLowInterval() const {
+ return !IsHighInterval() && (GetParent()->high_or_low_interval_ != nullptr);
+ }
+
+ void SetLowInterval(LiveInterval* low) {
+ DCHECK(IsHighInterval());
+ high_or_low_interval_ = low;
+ }
+
+ void SetHighInterval(LiveInterval* high) {
+ DCHECK(IsLowInterval());
+ high_or_low_interval_ = high;
+ }
+
+ void AddHighInterval(bool is_temp = false) {
+ DCHECK_EQ(GetParent(), this);
+ DCHECK(!HasHighInterval());
+ DCHECK(!HasLowInterval());
+ high_or_low_interval_ = new (allocator_) LiveInterval(
+ allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true);
+ high_or_low_interval_->high_or_low_interval_ = this;
+ if (first_range_ != nullptr) {
+ high_or_low_interval_->first_range_ = first_range_->Dup(allocator_);
+ high_or_low_interval_->last_range_ = first_range_->GetLastRange();
+ }
+ if (first_use_ != nullptr) {
+ high_or_low_interval_->first_use_ = first_use_->Dup(allocator_);
+ }
+ }
+
private:
LiveInterval(ArenaAllocator* allocator,
Primitive::Type type,
@@ -519,7 +588,8 @@
bool is_fixed = false,
int reg = kNoRegister,
bool is_temp = false,
- bool is_slow_path_safepoint = false)
+ bool is_slow_path_safepoint = false,
+ bool is_high_interval = false)
: allocator_(allocator),
first_range_(nullptr),
last_range_(nullptr),
@@ -532,6 +602,8 @@
is_fixed_(is_fixed),
is_temp_(is_temp),
is_slow_path_safepoint_(is_slow_path_safepoint),
+ is_high_interval_(is_high_interval),
+ high_or_low_interval_(nullptr),
defined_by_(defined_by) {}
ArenaAllocator* const allocator_;
@@ -568,6 +640,13 @@
// Whether the interval is for a safepoint that calls on slow path.
const bool is_slow_path_safepoint_;
+ // Whether this interval is a synthesized interval for register pair.
+ const bool is_high_interval_;
+
+ // If this interval needs a register pair, the high or low equivalent.
+ // `is_high_interval_` tells whether this holds the low or the high.
+ LiveInterval* high_or_low_interval_;
+
// The instruction represented by this interval.
HInstruction* const defined_by_;