diff options
| author | 2015-01-07 16:01:24 +0000 | |
|---|---|---|
| committer | 2015-01-08 13:57:51 +0000 | |
| commit | 840e5461a85f8908f51e7f6cd562a9129ff0e7ce (patch) | |
| tree | ea8b4cbc5a0e3dea96fefcd9247e6c06b17ac518 /compiler | |
| parent | 893e8881e31180721512c1b9e5ffacb03aad2e45 (diff) | |
Implement double and float support for arm in register allocator.
The basic approach is:
- An instruction that needs two registers gets two intervals.
- When allocating the low part, we also allocate the high part.
- When splitting a low (or high) interval, we also split the high
(or low) equivalent.
- Allocation follows the (S/D register) requirement that low
registers are always even and the high equivalent is low + 1.
Change-Id: I06a5148e05a2ffc7e7555d08e871ed007b4c2797
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 1 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 148 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 6 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/locations.h | 49 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.cc | 199 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.cc | 17 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.h | 81 | ||||
| -rw-r--r-- | compiler/utils/arm/assembler_arm.h | 7 |
15 files changed, 480 insertions, 67 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 4d8154e6a0..ada0fb75d7 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -620,6 +620,14 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { break; } + case Location::kFpuRegisterPair : { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.low()); + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kInFpuRegister, location.high()); + ++i; + DCHECK_LT(i, environment_size); + break; + } + default: LOG(FATAL) << "Unexpected kind " << location.GetKind(); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 4205ebebf9..9880239c88 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -142,6 +142,7 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { UNIMPLEMENTED(FATAL); UNREACHABLE(); } + virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3b3fb64763..63f5f94e7e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -373,6 +373,16 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kArmWordSize; } +size_t CodeGeneratorARM::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ StoreSToOffset(static_cast<SRegister>(reg_id), SP, stack_index); + return kArmWordSize; +} + +size_t CodeGeneratorARM::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + __ LoadSFromOffset(static_cast<SRegister>(reg_id), SP, stack_index); + return kArmWordSize; +} + CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), @@ -802,7 +812,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr __ LoadImmediate(IP, value); __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex()); } - } else if (const_to_move->IsLongConstant()) { + } else { + DCHECK(const_to_move->IsLongConstant()) << const_to_move; int64_t value = const_to_move->AsLongConstant()->GetValue(); if (location.IsRegisterPair()) { __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value)); @@ -2994,10 +3005,34 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + Location out = locations->Out(); + DCHECK(out.IsFpuRegister()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + Location out = locations->Out(); + DCHECK(out.IsFpuRegisterPair()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); + __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); + } + break; + } + case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); @@ -3114,12 +3149,36 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - LOG(FATAL) << "Unimplemented register type " << instruction->GetType(); - UNREACHABLE(); + case Primitive::kPrimFloat: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + Location value = locations->InAt(2); + DCHECK(value.IsFpuRegister()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), IP, data_offset); + } + break; + } + + case Primitive::kPrimDouble: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + Location value = locations->InAt(2); + DCHECK(value.IsFpuRegisterPair()); + if (index.IsConstant()) { + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; + __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), obj, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); + __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); + } + break; + } + case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << instruction->GetType(); + LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); } } @@ -3247,21 +3306,62 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { if (destination.IsRegister()) { __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(), SP, source.GetStackIndex()); + } else if (destination.IsFpuRegister()) { + __ LoadSFromOffset(destination.AsFpuRegister<SRegister>(), SP, source.GetStackIndex()); } else { DCHECK(destination.IsStackSlot()); __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); } - } else { - DCHECK(source.IsConstant()); - DCHECK(source.GetConstant()->IsIntConstant()); - int32_t value = source.GetConstant()->AsIntConstant()->GetValue(); - if (destination.IsRegister()) { - __ LoadImmediate(destination.AsRegister<Register>(), value); + } else if (source.IsFpuRegister()) { + if (destination.IsFpuRegister()) { + __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>()); } else { DCHECK(destination.IsStackSlot()); - __ LoadImmediate(IP, value); + __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex()); + } + } else if (source.IsFpuRegisterPair()) { + if (destination.IsFpuRegisterPair()) { + __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()), + SP, destination.GetStackIndex()); + } + } else if (source.IsDoubleStackSlot()) { + if (destination.IsFpuRegisterPair()) { + __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), + SP, source.GetStackIndex()); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + } + } else { + DCHECK(source.IsConstant()) << source; + HInstruction* constant = source.GetConstant(); + if (constant->IsIntConstant()) { + int32_t value = constant->AsIntConstant()->GetValue(); + if (destination.IsRegister()) { + __ LoadImmediate(destination.AsRegister<Register>(), value); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadImmediate(IP, value); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + } + } else { + DCHECK(constant->IsFloatConstant()); + float value = constant->AsFloatConstant()->GetValue(); + if (destination.IsFpuRegister()) { + __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), value); + } else { + DCHECK(destination.IsStackSlot()); + __ LoadImmediate(IP, bit_cast<int32_t, float>(value)); + __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); + } } } } @@ -3300,6 +3400,20 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { Exchange(destination.AsRegister<Register>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { Exchange(source.GetStackIndex(), destination.GetStackIndex()); + } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { + __ vmovrs(IP, source.AsFpuRegister<SRegister>()); + __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>()); + __ vmovsr(destination.AsFpuRegister<SRegister>(), IP); + } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { + SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>() + : destination.AsFpuRegister<SRegister>(); + int mem = source.IsFpuRegister() + ? destination.GetStackIndex() + : source.GetStackIndex(); + + __ vmovrs(IP, reg); + __ LoadSFromOffset(reg, SP, mem); + __ StoreToOffset(kStoreWord, IP, SP, mem); } else { LOG(FATAL) << "Unimplemented"; } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 40f4edc4eb..c1b4eda3a4 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -168,6 +168,8 @@ class CodeGeneratorARM : public CodeGenerator { void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t GetWordSize() const OVERRIDE { return kArmWordSize; @@ -237,6 +239,10 @@ class CodeGeneratorARM : public CodeGenerator { return isa_features_; } + bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { + return type == Primitive::kPrimDouble || type == Primitive::kPrimLong; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 19488a4ba2..e4da07be43 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -267,6 +267,10 @@ class CodeGeneratorARM64 : public CodeGenerator { ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; } + bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return false; + } + private: // Labels for each block that will be compiled. vixl::Label* block_labels_; diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 636f8845e5..acde122917 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -222,6 +222,10 @@ class CodeGeneratorX86 : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { + return type == Primitive::kPrimLong; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 070886460b..87f6b0f779 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -218,6 +218,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return false; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 9ed1e4528c..8a7bfd6087 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -30,10 +30,12 @@ class HGraphVisualizerPrinter : public HGraphVisitor { HGraphVisualizerPrinter(HGraph* graph, std::ostream& output, const char* pass_name, + bool is_after_pass, const CodeGenerator& codegen) : HGraphVisitor(graph), output_(output), pass_name_(pass_name), + is_after_pass_(is_after_pass), codegen_(codegen), indent_(0) {} @@ -136,6 +138,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << "invalid"; } else if (location.IsStackSlot()) { output_ << location.GetStackIndex() << "(sp)"; + } else if (location.IsFpuRegisterPair()) { + codegen_.DumpFloatingPointRegister(output_, location.low()); + output_ << " and "; + codegen_.DumpFloatingPointRegister(output_, location.high()); } else { DCHECK(location.IsDoubleStackSlot()); output_ << "2x" << location.GetStackIndex() << "(sp)"; @@ -224,7 +230,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor { void Run() { StartTag("cfg"); - PrintProperty("name", pass_name_); + std::string pass_desc = std::string(pass_name_) + (is_after_pass_ ? " (after)" : " (before)"); + PrintProperty("name", pass_desc.c_str()); VisitInsertionOrder(); EndTag("cfg"); } @@ -275,6 +282,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { private: std::ostream& output_; const char* pass_name_; + const bool is_after_pass_; const CodeGenerator& codegen_; size_t indent_; @@ -295,7 +303,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, } is_enabled_ = true; - HGraphVisualizerPrinter printer(graph_, *output_, "", codegen_); + HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", method_name); printer.PrintProperty("method", method_name); @@ -305,8 +313,7 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const { if (is_enabled_) { - std::string pass_desc = std::string(pass_name) + (is_after_pass ? " (after)" : " (before)"); - HGraphVisualizerPrinter printer(graph_, *output_, pass_desc.c_str(), codegen_); + HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_); printer.Run(); } } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 1ff26d914c..7df99d4b6f 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -160,6 +160,16 @@ class Location : public ValueObject { return GetPayload(); } + int low() const { + DCHECK(IsPair()); + return GetPayload() >> 16; + } + + int high() const { + DCHECK(IsPair()); + return GetPayload() & 0xFFFF; + } + template <typename T> T AsRegister() const { DCHECK(IsRegister()); @@ -175,25 +185,41 @@ class Location : public ValueObject { template <typename T> T AsRegisterPairLow() const { DCHECK(IsRegisterPair()); - return static_cast<T>(GetPayload() >> 16); + return static_cast<T>(low()); } template <typename T> T AsRegisterPairHigh() const { DCHECK(IsRegisterPair()); - return static_cast<T>(GetPayload() & 0xFFFF); + return static_cast<T>(high()); } template <typename T> T AsFpuRegisterPairLow() const { DCHECK(IsFpuRegisterPair()); - return static_cast<T>(GetPayload() >> 16); + return static_cast<T>(low()); } template <typename T> T AsFpuRegisterPairHigh() const { DCHECK(IsFpuRegisterPair()); - return static_cast<T>(GetPayload() & 0xFFFF); + return static_cast<T>(high()); + } + + bool IsPair() const { + return IsRegisterPair() || IsFpuRegisterPair(); + } + + Location ToLow() const { + return IsRegisterPair() + ? Location::RegisterLocation(low()) + : Location::FpuRegisterLocation(low()); + } + + Location ToHigh() const { + return IsRegisterPair() + ? Location::RegisterLocation(high()) + : Location::FpuRegisterLocation(high()); } static uintptr_t EncodeStackIndex(intptr_t stack_index) { @@ -264,6 +290,18 @@ class Location : public ValueObject { return value_ == other.value_; } + // Returns whether this location contains `other`. + bool Contains(Location other) const { + if (Equals(other)) return true; + if (IsRegisterPair() && other.IsRegister()) { + return low() == other.reg() || high() == other.reg(); + } + if (IsFpuRegisterPair() && other.IsFpuRegister()) { + return low() == other.reg() || high() == other.reg(); + } + return false; + } + const char* DebugString() const { switch (GetKind()) { case kInvalid: return "I"; @@ -525,7 +563,8 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { && (output_.GetPolicy() == Location::kSameAsFirstInput)) { return false; } - if (inputs_.Get(input_index).IsRegister() || inputs_.Get(input_index).IsFpuRegister()) { + Location input = inputs_.Get(input_index); + if (input.IsRegister() || input.IsFpuRegister() || input.IsPair()) { return false; } return true; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 0fc1fd8663..b98bc70a9f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2734,7 +2734,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> { // True if this blocks a move from the given location. bool Blocks(Location loc) const { - return !IsEliminated() && source_.Equals(loc); + return !IsEliminated() && source_.Contains(loc); } // A move is redundant if it's been eliminated, if its source and diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index c1c805dc56..1efc52b9ec 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -27,6 +27,12 @@ namespace art { static constexpr size_t kMaxLifetimePosition = -1; static constexpr size_t kDefaultNumberOfSpillSlots = 4; +// For simplicity, we implement register pairs as (reg, reg + 1). +// Note that this is a requirement for double registers on ARM, since we +// allocate SRegister. +static int GetHighForLowRegister(int reg) { return reg + 1; } +static bool IsLowRegister(int reg) { return (reg & 1) == 0; } + RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) @@ -72,10 +78,16 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, !it.Done(); it.Advance()) { HInstruction* current = it.Current(); - if (current->GetType() == Primitive::kPrimLong || - current->GetType() == Primitive::kPrimFloat || - current->GetType() == Primitive::kPrimDouble) { - return false; + if (instruction_set == kX86) { + if (current->GetType() == Primitive::kPrimLong || + current->GetType() == Primitive::kPrimFloat || + current->GetType() == Primitive::kPrimDouble) { + return false; + } + } else if (instruction_set == kArm || instruction_set == kThumb2) { + if (current->GetType() == Primitive::kPrimLong) { + return false; + } } } } @@ -130,7 +142,7 @@ void RegisterAllocator::BlockRegister(Location location, : physical_fp_register_intervals_.Get(reg); Primitive::Type type = location.IsRegister() ? Primitive::kPrimInt - : Primitive::kPrimDouble; + : Primitive::kPrimFloat; if (interval == nullptr) { interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); if (location.IsRegister()) { @@ -226,6 +238,12 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); temp_intervals_.Add(interval); interval->AddRange(position, position + 1); + if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + interval->AddHighInterval(true); + LiveInterval* high = interval->GetHighInterval(); + temp_intervals_.Add(high); + unhandled_fp_intervals_.Add(high); + } unhandled_fp_intervals_.Add(interval); break; } @@ -279,6 +297,9 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { Location input = locations->InAt(i); if (input.IsRegister() || input.IsFpuRegister()) { BlockRegister(input, position, position + 1); + } else if (input.IsPair()) { + BlockRegister(input.ToLow(), position, position + 1); + BlockRegister(input.ToHigh(), position, position + 1); } } @@ -291,6 +312,10 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); + if (codegen_->NeedsTwoRegisters(current->GetType())) { + current->AddHighInterval(); + } + // Some instructions define their output in fixed register/stack slot. We need // to ensure we know these locations before doing register allocation. For a // given register, we create an interval that covers these locations. The register @@ -304,14 +329,30 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { if (first.IsRegister() || first.IsFpuRegister()) { current->SetFrom(position + 1); current->SetRegister(first.reg()); + } else if (first.IsPair()) { + current->SetFrom(position + 1); + current->SetRegister(first.low()); + LiveInterval* high = current->GetHighInterval(); + high->SetRegister(first.high()); + high->SetFrom(position + 1); } } else if (output.IsRegister() || output.IsFpuRegister()) { // Shift the interval's start by one to account for the blocked register. current->SetFrom(position + 1); current->SetRegister(output.reg()); BlockRegister(output, position, position + 1); + } else if (output.IsPair()) { + current->SetFrom(position + 1); + current->SetRegister(output.low()); + LiveInterval* high = current->GetHighInterval(); + high->SetRegister(output.high()); + high->SetFrom(position + 1); + BlockRegister(output.ToLow(), position, position + 1); + BlockRegister(output.ToHigh(), position, position + 1); } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { current->SetSpillSlot(output.GetStackIndex()); + } else { + DCHECK(output.IsUnallocated() || output.IsConstant()); } // If needed, add interval to the list of unhandled intervals. @@ -516,6 +557,7 @@ void RegisterAllocator::LinearScan() { LiveInterval* current = unhandled_->Pop(); DCHECK(!current->IsFixed() && !current->HasSpillSlot()); DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart()); + DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval()); size_t position = current->GetStart(); @@ -566,6 +608,13 @@ void RegisterAllocator::LinearScan() { continue; } + if (current->IsHighInterval() && !current->GetLowInterval()->HasRegister()) { + DCHECK(!current->HasRegister()); + // Allocating the low part was unsucessful. The splitted interval for the high part + // will be handled next (it is in the `unhandled_` list). + continue; + } + // (4) Try to find an available register. bool success = TryAllocateFreeReg(current); @@ -578,6 +627,9 @@ void RegisterAllocator::LinearScan() { // intervals. if (success) { active_.Add(current); + if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { + current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); + } } } } @@ -630,26 +682,31 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { if (current->HasRegister()) { // Some instructions have a fixed register output. reg = current->GetRegister(); - DCHECK_NE(free_until[reg], 0u); + if (free_until[reg] == 0) { + DCHECK(current->IsHighInterval()); + // AllocateBlockedReg will spill the holder of the register. + return false; + } } else { + DCHECK(!current->IsHighInterval()); int hint = current->FindFirstRegisterHint(free_until); if (hint != kNoRegister) { DCHECK(!IsBlocked(hint)); reg = hint; + } else if (current->IsLowInterval()) { + reg = FindAvailableRegisterPair(free_until); } else { - // Pick the register that is free the longest. - for (size_t i = 0; i < number_of_registers_; ++i) { - if (IsBlocked(i)) continue; - if (reg == -1 || free_until[i] > free_until[reg]) { - reg = i; - if (free_until[i] == kMaxLifetimePosition) break; - } - } + reg = FindAvailableRegister(free_until); } } + DCHECK_NE(reg, -1); // If we could not find a register, we need to spill. - if (reg == -1 || free_until[reg] == 0) { + if (free_until[reg] == 0) { + return false; + } + + if (current->IsLowInterval() && free_until[GetHighForLowRegister(reg)] == 0) { return false; } @@ -671,6 +728,40 @@ bool RegisterAllocator::IsBlocked(int reg) const { : blocked_fp_registers_[reg]; } +int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use) const { + int reg = -1; + // Pick the register pair that is used the last. + for (size_t i = 0; i < number_of_registers_; ++i) { + if (IsBlocked(i)) continue; + if (!IsLowRegister(i)) continue; + int high_register = GetHighForLowRegister(i); + if (IsBlocked(high_register)) continue; + int existing_high_register = GetHighForLowRegister(reg); + if ((reg == -1) || (next_use[i] >= next_use[reg] + && next_use[high_register] >= next_use[existing_high_register])) { + reg = i; + if (next_use[i] == kMaxLifetimePosition + && next_use[high_register] == kMaxLifetimePosition) { + break; + } + } + } + return reg; +} + +int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { + int reg = -1; + // Pick the register that is used the last. + for (size_t i = 0; i < number_of_registers_; ++i) { + if (IsBlocked(i)) continue; + if (reg == -1 || next_use[i] > next_use[reg]) { + reg = i; + if (next_use[i] == kMaxLifetimePosition) break; + } + } + return reg; +} + // Find the register that is used the last, and spill the interval // that holds it. If the first use of `current` is after that register // we spill `current` instead. @@ -731,17 +822,20 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { } } - // Pick the register that is used the last. int reg = -1; - for (size_t i = 0; i < number_of_registers_; ++i) { - if (IsBlocked(i)) continue; - if (reg == -1 || next_use[i] > next_use[reg]) { - reg = i; - if (next_use[i] == kMaxLifetimePosition) break; - } + if (current->HasRegister()) { + DCHECK(current->IsHighInterval()); + reg = current->GetRegister(); + } else if (current->IsLowInterval()) { + reg = FindAvailableRegisterPair(next_use); + } else { + DCHECK(!current->IsHighInterval()); + reg = FindAvailableRegister(next_use); } - if (first_register_use >= next_use[reg]) { + if ((first_register_use >= next_use[reg]) + || (current->IsLowInterval() && first_register_use >= next_use[GetHighForLowRegister(reg)])) { + DCHECK(!current->IsHighInterval()); // If the first use of that instruction is after the last use of the found // register, we split this interval just before its first register use. AllocateSpillSlotFor(current); @@ -815,23 +909,49 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter break; } } + array->InsertAt(insert_at, interval); + // Insert the high interval before the low, to ensure the low is processed before. + if (interval->HasHighInterval()) { + array->InsertAt(insert_at, interval->GetHighInterval()); + } else if (interval->HasLowInterval()) { + array->InsertAt(insert_at + 1, interval->GetLowInterval()); + } } LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) { - DCHECK(position >= interval->GetStart()); + DCHECK_GE(position, interval->GetStart()); DCHECK(!interval->IsDeadAt(position)); if (position == interval->GetStart()) { // Spill slot will be allocated when handling `interval` again. interval->ClearRegister(); + if (interval->HasHighInterval()) { + interval->GetHighInterval()->ClearRegister(); + } else if (interval->HasLowInterval()) { + interval->GetLowInterval()->ClearRegister(); + } return interval; } else { LiveInterval* new_interval = interval->SplitAt(position); + if (interval->HasHighInterval()) { + LiveInterval* high = interval->GetHighInterval()->SplitAt(position); + new_interval->SetHighInterval(high); + high->SetLowInterval(new_interval); + } else if (interval->HasLowInterval()) { + LiveInterval* low = interval->GetLowInterval()->SplitAt(position); + new_interval->SetLowInterval(low); + low->SetHighInterval(new_interval); + } return new_interval; } } void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { + if (interval->IsHighInterval()) { + // The low interval will contain the spill slot. + return; + } + LiveInterval* parent = interval->GetParent(); // An instruction gets a spill slot for its entire lifetime. If the parent @@ -898,6 +1018,7 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { static bool IsValidDestination(Location destination) { return destination.IsRegister() || destination.IsFpuRegister() + || destination.IsFpuRegisterPair() || destination.IsStackSlot() || destination.IsDoubleStackSlot(); } @@ -905,7 +1026,6 @@ static bool IsValidDestination(Location destination) { void RegisterAllocator::AddInputMoveFor(HInstruction* user, Location source, Location destination) const { - DCHECK(IsValidDestination(destination)); if (source.Equals(destination)) return; DCHECK(!user->IsPhi()); @@ -1075,9 +1195,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { if (current->HasSpillSlot() && current->HasRegister()) { // We spill eagerly, so move must be at definition. InsertMoveAfter(interval->GetDefinedBy(), - interval->IsFloatingPoint() - ? Location::FpuRegisterLocation(interval->GetRegister()) - : Location::RegisterLocation(interval->GetRegister()), + interval->ToLocation(), interval->NeedsTwoSpillSlots() ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) : Location::StackSlot(interval->GetParent()->GetSpillSlot())); @@ -1148,6 +1266,11 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { locations->AddLiveRegister(source); break; } + case Location::kFpuRegisterPair: { + locations->AddLiveRegister(source.ToLow()); + locations->AddLiveRegister(source.ToHigh()); + break; + } case Location::kStackSlot: // Fall-through case Location::kDoubleStackSlot: // Fall-through case Location::kConstant: { @@ -1307,6 +1430,10 @@ void RegisterAllocator::Resolve() { size_t temp_index = 0; for (size_t i = 0; i < temp_intervals_.Size(); ++i) { LiveInterval* temp = temp_intervals_.Get(i); + if (temp->IsHighInterval()) { + // High intervals can be skipped, they are already handled by the low interval. + continue; + } HInstruction* at = liveness_.GetTempUser(temp); if (at != current) { temp_index = 0; @@ -1320,14 +1447,14 @@ void RegisterAllocator::Resolve() { break; case Primitive::kPrimDouble: - // TODO: Support the case of ARM, where a double value - // requires an FPU register pair (note that the ARM back end - // does not yet use this register allocator when a method uses - // floats or doubles). - DCHECK(codegen_->GetInstructionSet() != kArm - && codegen_->GetInstructionSet() != kThumb2); - locations->SetTempAt( - temp_index++, Location::FpuRegisterLocation(temp->GetRegister())); + if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + Location location = Location::FpuRegisterPairLocation( + temp->GetRegister(), temp->GetHighInterval()->GetRegister()); + locations->SetTempAt(temp_index++, location); + } else { + locations->SetTempAt( + temp_index++, Location::FpuRegisterLocation(temp->GetRegister())); + } break; default: diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index cbe741c2b3..c152a8bf67 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -128,6 +128,8 @@ class RegisterAllocator { bool ValidateInternal(bool log_fatal_on_failure) const; void DumpInterval(std::ostream& stream, LiveInterval* interval) const; void DumpAllIntervals(std::ostream& stream) const; + int FindAvailableRegisterPair(size_t* next_use) const; + int FindAvailableRegister(size_t* next_use) const; ArenaAllocator* const allocator_; CodeGenerator* const codegen_; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 660a5c5f60..d41157b8d8 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -419,10 +419,21 @@ bool LiveInterval::NeedsTwoSpillSlots() const { } Location LiveInterval::ToLocation() const { + DCHECK(!IsHighInterval()); if (HasRegister()) { - return IsFloatingPoint() - ? Location::FpuRegisterLocation(GetRegister()) - : Location::RegisterLocation(GetRegister()); + if (IsFloatingPoint()) { + if (HasHighInterval()) { + return Location::FpuRegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister()); + } else { + return Location::FpuRegisterLocation(GetRegister()); + } + } else { + if (HasHighInterval()) { + return Location::RegisterPairLocation(GetRegister(), GetHighInterval()->GetRegister()); + } else { + return Location::RegisterLocation(GetRegister()); + } + } } else { HInstruction* defined_by = GetParent()->GetDefinedBy(); if (defined_by->IsConstant()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 23123891ef..74611e1cbb 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -77,6 +77,15 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { stream << "[" << start_ << ", " << end_ << ")"; } + LiveRange* Dup(ArenaAllocator* allocator) const { + return new (allocator) LiveRange( + start_, end_, next_ == nullptr ? nullptr : next_->Dup(allocator)); + } + + LiveRange* GetLastRange() { + return next_ == nullptr ? this : next_->GetLastRange(); + } + private: size_t start_; size_t end_; @@ -123,6 +132,12 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { stream << position_; } + UsePosition* Dup(ArenaAllocator* allocator) const { + return new (allocator) UsePosition( + user_, input_index_, is_environment_, position_, + next_ == nullptr ? nullptr : next_->Dup(allocator)); + } + private: HInstruction* const user_; const size_t input_index_; @@ -478,6 +493,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } stream << "}"; stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit(); + stream << " is_high: " << IsHighInterval(); + stream << " is_low: " << IsLowInterval(); } LiveInterval* GetNextSibling() const { return next_sibling_; } @@ -512,6 +529,58 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Returns whether `other` and `this` share the same kind of register. bool SameRegisterKind(Location other) const; + bool HasHighInterval() const { + return !IsHighInterval() && (GetParent()->high_or_low_interval_ != nullptr); + } + + bool HasLowInterval() const { + return IsHighInterval(); + } + + LiveInterval* GetLowInterval() const { + DCHECK(HasLowInterval()); + return high_or_low_interval_; + } + + LiveInterval* GetHighInterval() const { + DCHECK(HasHighInterval()); + return high_or_low_interval_; + } + + bool IsHighInterval() const { + return GetParent()->is_high_interval_; + } + + bool IsLowInterval() const { + return !IsHighInterval() && (GetParent()->high_or_low_interval_ != nullptr); + } + + void SetLowInterval(LiveInterval* low) { + DCHECK(IsHighInterval()); + high_or_low_interval_ = low; + } + + void SetHighInterval(LiveInterval* high) { + DCHECK(IsLowInterval()); + high_or_low_interval_ = high; + } + + void AddHighInterval(bool is_temp = false) { + DCHECK_EQ(GetParent(), this); + DCHECK(!HasHighInterval()); + DCHECK(!HasLowInterval()); + high_or_low_interval_ = new (allocator_) LiveInterval( + allocator_, type_, defined_by_, false, kNoRegister, is_temp, false, true); + high_or_low_interval_->high_or_low_interval_ = this; + if (first_range_ != nullptr) { + high_or_low_interval_->first_range_ = first_range_->Dup(allocator_); + high_or_low_interval_->last_range_ = first_range_->GetLastRange(); + } + if (first_use_ != nullptr) { + high_or_low_interval_->first_use_ = first_use_->Dup(allocator_); + } + } + private: LiveInterval(ArenaAllocator* allocator, Primitive::Type type, @@ -519,7 +588,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { bool is_fixed = false, int reg = kNoRegister, bool is_temp = false, - bool is_slow_path_safepoint = false) + bool is_slow_path_safepoint = false, + bool is_high_interval = false) : allocator_(allocator), first_range_(nullptr), last_range_(nullptr), @@ -532,6 +602,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { is_fixed_(is_fixed), is_temp_(is_temp), is_slow_path_safepoint_(is_slow_path_safepoint), + is_high_interval_(is_high_interval), + high_or_low_interval_(nullptr), defined_by_(defined_by) {} ArenaAllocator* const allocator_; @@ -568,6 +640,13 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Whether the interval is for a safepoint that calls on slow path. const bool is_slow_path_safepoint_; + // Whether this interval is a synthesized interval for register pair. + const bool is_high_interval_; + + // If this interval needs a register pair, the high or low equivalent. + // `is_high_interval_` tells whether this holds the low or the high. + LiveInterval* high_or_low_interval_; + // The instruction represented by this interval. HInstruction* const defined_by_; diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 87b38133fb..d9122764d0 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -534,6 +534,13 @@ class ArmAssembler : public Assembler { // Load and Store. May clobber IP. virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0; + void LoadSImmediate(SRegister sd, float value, Condition cond = AL) { + if (!vmovs(sd, value, cond)) { + LoadImmediate(IP, bit_cast<int32_t, float>(value), cond); + vmovsr(sd, IP, cond); + } + } + virtual void MarkExceptionHandler(Label* label) = 0; virtual void LoadFromOffset(LoadOperandType type, Register reg, |