diff options
author | 2014-06-19 10:00:34 +0100 | |
---|---|---|
committer | 2014-07-02 16:00:32 +0100 | |
commit | 412f10cfed002ab617c78f2621d68446ca4dd8bd (patch) | |
tree | bbd9dddd0436da566365ada5deb1840e315e1b11 | |
parent | d6ab04646d8eec6f24b200f8649f3d942d9ad17e (diff) |
Support longs in the register allocator for x86_64.
Change-Id: I7fb6dfb761bc5cf9e5705682032855a0a70ca867
-rw-r--r-- | compiler/optimizing/builder.cc | 15 | ||||
-rw-r--r-- | compiler/optimizing/builder.h | 9 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 42 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 55 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 96 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 10 | ||||
-rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 17 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 57 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.h | 2 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 16 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 2 | ||||
-rw-r--r-- | test/405-optimizing-long-allocator/expected.txt | 0 | ||||
-rw-r--r-- | test/405-optimizing-long-allocator/info.txt | 1 | ||||
-rw-r--r-- | test/405-optimizing-long-allocator/src/Main.java | 172 |
18 files changed, 475 insertions, 34 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index c3a322caee..cc995f72a1 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -226,7 +226,7 @@ HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const { } template<typename T> -void HGraphBuilder::Binop_32x(const Instruction& instruction, Primitive::Type type) { +void HGraphBuilder::Binop_23x(const Instruction& instruction, Primitive::Type type) { HInstruction* first = LoadLocal(instruction.VRegB(), type); HInstruction* second = LoadLocal(instruction.VRegC(), type); current_block_->AddInstruction(new (arena_) T(type, first, second)); @@ -501,22 +501,22 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ } case Instruction::ADD_INT: { - Binop_32x<HAdd>(instruction, Primitive::kPrimInt); + Binop_23x<HAdd>(instruction, Primitive::kPrimInt); break; } case Instruction::ADD_LONG: { - Binop_32x<HAdd>(instruction, Primitive::kPrimLong); + Binop_23x<HAdd>(instruction, Primitive::kPrimLong); break; } case Instruction::SUB_INT: { - Binop_32x<HSub>(instruction, Primitive::kPrimInt); + Binop_23x<HSub>(instruction, Primitive::kPrimInt); break; } case Instruction::SUB_LONG: { - Binop_32x<HSub>(instruction, Primitive::kPrimLong); + Binop_23x<HSub>(instruction, Primitive::kPrimLong); break; } @@ -573,6 +573,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_ UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); break; + case Instruction::CMP_LONG: { + Binop_23x<HCompare>(instruction, Primitive::kPrimLong); + break; + } + case Instruction::NOP: break; diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 0852a26c55..ee32ca80ac 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -73,7 +73,7 @@ class HGraphBuilder : public ValueObject { bool InitializeParameters(uint16_t number_of_parameters); template<typename T> - void Binop_32x(const Instruction& instruction, Primitive::Type type); + void Binop_23x(const Instruction& instruction, Primitive::Type type); template<typename T> void Binop_12x(const Instruction& instruction, Primitive::Type type); @@ -84,11 +84,8 @@ class HGraphBuilder : public ValueObject { template<typename T> void Binop_22s(const Instruction& instruction, bool reverse); - template<typename T> - void If_22t(const Instruction& instruction, int32_t dex_offset); - - template<typename T> - void If_21t(const Instruction& instruction, int32_t dex_offset); + template<typename T> void If_21t(const Instruction& instruction, int32_t dex_offset); + template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset); void BuildReturn(const Instruction& instruction, Primitive::Type type); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 83621e0f72..ae2f03080e 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -90,6 +90,7 @@ class CodeGenerator : public ArenaObject { virtual void SetupBlockedRegisters(bool* blocked_registers) const = 0; virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; + virtual InstructionSet GetInstructionSet() const = 0; void RecordPcInfo(uint32_t dex_pc) { struct PcInfo pc_info; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ec3c81533f..d87c14b4db 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -905,6 +905,48 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) { locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(1)); } +void LocationsBuilderARM::VisitCompare(HCompare* compare) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + compare->SetLocations(locations); +} + +void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { + Label greater, done; + LocationSummary* locations = compare->GetLocations(); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: { + Register output = locations->Out().AsArm().AsCoreRegister(); + ArmManagedRegister left = locations->InAt(0).AsArm(); + ArmManagedRegister right = locations->InAt(1).AsArm(); + Label less, greater, done; + __ cmp(left.AsRegisterPairHigh(), + ShifterOperand(right.AsRegisterPairHigh())); // Signed compare. + __ b(&less, LT); + __ b(&greater, GT); + __ cmp(left.AsRegisterPairLow(), + ShifterOperand(right.AsRegisterPairLow())); // Unsigned compare. + __ LoadImmediate(output, 0); + __ b(&done, EQ); + __ b(&less, CC); + + __ Bind(&greater); + __ LoadImmediate(output, 1); + __ b(&done); + + __ Bind(&less); + __ LoadImmediate(output, -1); + + __ Bind(&done); + break; + } + default: + LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + } +} + void LocationsBuilderARM::VisitPhi(HPhi* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 712a24cf67..c46c1b131c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -171,6 +171,10 @@ class CodeGeneratorARM : public CodeGenerator { return &move_resolver_; } + virtual InstructionSet GetInstructionSet() const OVERRIDE { + return InstructionSet::kArm; + } + private: // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f624f3ce90..572d494719 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -81,12 +81,23 @@ ManagedRegister CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type, bool* blocked_registers) const { switch (type) { case Primitive::kPrimLong: { - size_t reg = AllocateFreeRegisterInternal( - GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs); + bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers); + size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs); X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); blocked_registers[pair.AsRegisterPairLow()] = true; blocked_registers[pair.AsRegisterPairHigh()] = true; + // Block all other register pairs that share a register with `pair`. + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + X86ManagedRegister current = + X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (current.AsRegisterPairLow() == pair.AsRegisterPairLow() + || current.AsRegisterPairLow() == pair.AsRegisterPairHigh() + || current.AsRegisterPairHigh() == pair.AsRegisterPairLow() + || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) { + blocked_register_pairs[i] = true; + } + } return pair; } @@ -901,6 +912,46 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) { __ xorl(out.AsX86().AsCpuRegister(), Immediate(1)); } +void LocationsBuilderX86::VisitCompare(HCompare* compare) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + compare->SetLocations(locations); +} + +void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { + Label greater, done; + LocationSummary* locations = compare->GetLocations(); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: { + Label less, greater, done; + Register output = locations->Out().AsX86().AsCpuRegister(); + X86ManagedRegister left = locations->InAt(0).AsX86(); + X86ManagedRegister right = locations->InAt(1).AsX86(); + __ cmpl(left.AsRegisterPairHigh(), right.AsRegisterPairHigh()); + __ j(kLess, &less); // Signed compare. + __ j(kGreater, &greater); // Signed compare. + __ cmpl(left.AsRegisterPairLow(), right.AsRegisterPairLow()); + __ movl(output, Immediate(0)); + __ j(kEqual, &done); + __ j(kBelow, &less); // Unsigned compare. + + __ Bind(&greater); + __ movl(output, Immediate(1)); + __ jmp(&done); + + __ Bind(&less); + __ movl(output, Immediate(-1)); + + __ Bind(&done); + break; + } + default: + LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + } +} + void LocationsBuilderX86::VisitPhi(HPhi* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index acc670e09b..8a8216a56d 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -173,6 +173,10 @@ class CodeGeneratorX86 : public CodeGenerator { return &move_resolver_; } + virtual InstructionSet GetInstructionSet() const OVERRIDE { + return InstructionSet::kX86; + } + private: // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 283f1f5e57..dc1d6164b1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -228,7 +228,9 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { } } -void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HInstruction* move_for) { +void CodeGeneratorX86_64::Move(HInstruction* instruction, + Location location, + HInstruction* move_for) { if (instruction->AsIntConstant() != nullptr) { Immediate imm(instruction->AsIntConstant()->GetValue()); if (location.IsRegister()) { @@ -383,7 +385,7 @@ void LocationsBuilderX86_64::VisitCondition(HCondition* comp) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetOut(Location::RequiresRegister()); comp->SetLocations(locations); } @@ -444,6 +446,39 @@ void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual VisitCondition(comp); } +void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + compare->SetLocations(locations); +} + +void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { + Label greater, done; + LocationSummary* locations = compare->GetLocations(); + switch (compare->InputAt(0)->GetType()) { + case Primitive::kPrimLong: + __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(), + locations->InAt(1).AsX86_64().AsCpuRegister()); + break; + default: + LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType(); + } + + __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(0)); + __ j(kEqual, &done); + __ j(kGreater, &greater); + + __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(-1)); + __ jmp(&done); + + __ Bind(&greater); + __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1)); + + __ Bind(&done); +} + void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { // TODO: Support constant locations. LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); @@ -463,7 +498,7 @@ void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { } void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) { - // Will be generated at use site. + codegen_->Move(constant, constant->GetLocations()->Out(), nullptr); } void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { @@ -812,10 +847,13 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { if (source.IsRegister()) { if (destination.IsRegister()) { __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister()); - } else { - DCHECK(destination.IsStackSlot()); + } else if (destination.IsStackSlot()) { __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister()); + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.AsX86_64().AsCpuRegister()); } } else if (source.IsStackSlot()) { if (destination.IsRegister()) { @@ -826,18 +864,27 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } + } else if (source.IsDoubleStackSlot()) { + if (destination.IsRegister()) { + __ movq(destination.AsX86_64().AsX86_64().AsCpuRegister(), + Address(CpuRegister(RSP), source.GetStackIndex())); + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + } } else { LOG(FATAL) << "Unimplemented"; } } -void ParallelMoveResolverX86_64::Exchange(CpuRegister reg, int mem) { +void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); - __ movl(Address(CpuRegister(RSP), mem), CpuRegister(reg)); - __ movl(CpuRegister(reg), CpuRegister(TMP)); + __ movl(Address(CpuRegister(RSP), mem), reg); + __ movl(reg, CpuRegister(TMP)); } -void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) { +void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) { ScratchRegisterScope ensure_scratch( this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); @@ -850,6 +897,25 @@ void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) { CpuRegister(ensure_scratch.GetRegister())); } +void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); + __ movq(Address(CpuRegister(RSP), mem), reg); + __ movq(reg, CpuRegister(TMP)); +} + +void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movq(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); +} + void ParallelMoveResolverX86_64::EmitSwap(size_t index) { MoveOperands* move = moves_.Get(index); Location source = move->GetSource(); @@ -858,11 +924,17 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { if (source.IsRegister() && destination.IsRegister()) { __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister()); } else if (source.IsRegister() && destination.IsStackSlot()) { - Exchange(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex()); + Exchange32(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { - Exchange(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex()); + Exchange32(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { - Exchange(destination.GetStackIndex(), source.GetStackIndex()); + Exchange32(destination.GetStackIndex(), source.GetStackIndex()); + } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { + Exchange64(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex()); + } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { + Exchange64(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex()); + } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { + Exchange64(destination.GetStackIndex(), source.GetStackIndex()); } else { LOG(FATAL) << "Unimplemented"; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index f07df292e0..d347a4f121 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -69,8 +69,10 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver { X86_64Assembler* GetAssembler() const; private: - void Exchange(CpuRegister reg, int mem); - void Exchange(int mem1, int mem2); + void Exchange32(CpuRegister reg, int mem); + void Exchange32(int mem1, int mem2); + void Exchange64(CpuRegister reg, int mem); + void Exchange64(int mem1, int mem2); CodeGeneratorX86_64* const codegen_; @@ -170,6 +172,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + virtual InstructionSet GetInstructionSet() const OVERRIDE { + return InstructionSet::kX86_64; + } + private: // Helper method to move a value between two locations. void Move(Location destination, Location source); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index a49ce64a2d..f033e2e22b 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -108,9 +108,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } else { codegen_.DumpCoreRegister(output_, location.reg().RegId()); } - } else { - DCHECK(location.IsStackSlot()); + } else if (location.IsStackSlot()) { output_ << location.GetStackIndex() << "(sp)"; + } else { + DCHECK(location.IsDoubleStackSlot()); + output_ << "2x" << location.GetStackIndex() << "(sp)"; } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 503f31d990..92920845c3 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -414,6 +414,7 @@ class HBasicBlock : public ArenaObject { M(ReturnVoid) \ M(StoreLocal) \ M(Sub) \ + M(Compare) \ #define FORWARD_DECLARATION(type) class H##type; @@ -986,6 +987,22 @@ class HGreaterThanOrEqual : public HCondition { }; +// Instruction to check how two inputs compare to each other. +// Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1. +class HCompare : public HBinaryOperation { + public: + HCompare(Primitive::Type type, HInstruction* first, HInstruction* second) + : HBinaryOperation(Primitive::kPrimInt, first, second) { + DCHECK_EQ(type, first->GetType()); + DCHECK_EQ(type, second->GetType()); + } + + DECLARE_INSTRUCTION(Compare); + + private: + DISALLOW_COPY_AND_ASSIGN(HCompare); +}; + // A local in the graph. Corresponds to a Dex register. class HLocal : public HTemplateInstruction<0> { public: diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 1f4cb41582..68130dd5fc 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -55,7 +55,7 @@ bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph, it.Advance()) { HInstruction* current = it.Current(); if (current->NeedsEnvironment()) return false; - if (current->GetType() == Primitive::kPrimLong) return false; + if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false; if (current->GetType() == Primitive::kPrimFloat) return false; if (current->GetType() == Primitive::kPrimDouble) return false; } @@ -139,7 +139,7 @@ void RegisterAllocator::AllocateRegistersInternal() { current->SetFrom(position + 1); current->SetRegister(output.reg().RegId()); BlockRegister(output, position, position + 1, instruction->GetType()); - } else if (output.IsStackSlot()) { + } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { current->SetSpillSlot(output.GetStackIndex()); } for (size_t i = 0; i < instruction->InputCount(); ++i) { @@ -430,7 +430,7 @@ bool RegisterAllocator::IsBlocked(int reg) const { // we spill `current` instead. bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { size_t first_register_use = current->FirstRegisterUse(); - if (current->FirstRegisterUse() == kNoLifetime) { + if (first_register_use == kNoLifetime) { AllocateSpillSlotFor(current); return false; } @@ -559,6 +559,10 @@ LiveInterval* RegisterAllocator::Split(LiveInterval* interval, size_t position) } } +static bool NeedTwoSpillSlot(Primitive::Type type) { + return type == Primitive::kPrimLong || type == Primitive::kPrimDouble; +} + void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { LiveInterval* parent = interval->GetParent(); @@ -581,6 +585,43 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { } size_t end = last_sibling->GetEnd(); + if (NeedTwoSpillSlot(parent->GetType())) { + AllocateTwoSpillSlots(parent, end); + } else { + AllocateOneSpillSlot(parent, end); + } +} + +void RegisterAllocator::AllocateTwoSpillSlots(LiveInterval* parent, size_t end) { + // Find an available spill slot. + size_t slot = 0; + for (size_t e = spill_slots_.Size(); slot < e; ++slot) { + // We check if it is less rather than less or equal because the parallel move + // resolver does not work when a single spill slot needs to be exchanged with + // a double spill slot. The strict comparison avoids needing to exchange these + // locations at the same lifetime position. + if (spill_slots_.Get(slot) < parent->GetStart() + && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) { + break; + } + } + + if (slot == spill_slots_.Size()) { + // We need a new spill slot. + spill_slots_.Add(end); + spill_slots_.Add(end); + } else if (slot == spill_slots_.Size() - 1) { + spill_slots_.Put(slot, end); + spill_slots_.Add(end); + } else { + spill_slots_.Put(slot, end); + spill_slots_.Put(slot + 1, end); + } + + parent->SetSpillSlot(slot * kVRegSize); +} + +void RegisterAllocator::AllocateOneSpillSlot(LiveInterval* parent, size_t end) { // Find an available spill slot. size_t slot = 0; for (size_t e = spill_slots_.Size(); slot < e; ++slot) { @@ -604,7 +645,11 @@ static Location ConvertToLocation(LiveInterval* interval) { return Location::RegisterLocation(ManagedRegister(interval->GetRegister())); } else { DCHECK(interval->GetParent()->HasSpillSlot()); - return Location::StackSlot(interval->GetParent()->GetSpillSlot()); + if (NeedTwoSpillSlot(interval->GetType())) { + return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); + } else { + return Location::StackSlot(interval->GetParent()->GetSpillSlot()); + } } } @@ -750,7 +795,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { // We spill eagerly, so move must be at definition. InsertMoveAfter(interval->GetDefinedBy(), Location::RegisterLocation(ManagedRegister(interval->GetRegister())), - Location::StackSlot(interval->GetParent()->GetSpillSlot())); + NeedTwoSpillSlot(interval->GetType()) + ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) + : Location::StackSlot(interval->GetParent()->GetSpillSlot())); } UsePosition* use = current->GetFirstUse(); diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index e63122ffed..7d4cd1a862 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -93,6 +93,8 @@ class RegisterAllocator { // Allocate a spill slot for the given interval. void AllocateSpillSlotFor(LiveInterval* interval); + void AllocateOneSpillSlot(LiveInterval* interval, size_t end); + void AllocateTwoSpillSlots(LiveInterval* interval, size_t end); // Connect adjacent siblings within blocks. void ConnectSiblings(LiveInterval* interval); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 41d1529ef5..4d5d613015 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -949,6 +949,14 @@ void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) { } +void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); // andq only supports 32b immediate. + EmitRex64(reg); + EmitComplex(4, Operand(reg), imm); +} + + void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); @@ -972,6 +980,14 @@ void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) { } +void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(dst, src); + EmitUint8(0x33); + EmitOperand(dst.LowBits(), Operand(src)); +} + + void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int32()); // xorq only supports 32b immediate. diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 9aa5a54df4..7514854829 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -391,12 +391,14 @@ class X86_64Assembler FINAL : public Assembler { void andl(CpuRegister dst, const Immediate& imm); void andl(CpuRegister dst, CpuRegister src); + void andq(CpuRegister dst, const Immediate& imm); void orl(CpuRegister dst, const Immediate& imm); void orl(CpuRegister dst, CpuRegister src); void xorl(CpuRegister dst, CpuRegister src); void xorq(CpuRegister dst, const Immediate& imm); + void xorq(CpuRegister dst, CpuRegister src); void addl(CpuRegister dst, CpuRegister src); void addl(CpuRegister reg, const Immediate& imm); diff --git a/test/405-optimizing-long-allocator/expected.txt b/test/405-optimizing-long-allocator/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/405-optimizing-long-allocator/expected.txt diff --git a/test/405-optimizing-long-allocator/info.txt b/test/405-optimizing-long-allocator/info.txt new file mode 100644 index 0000000000..b6b31aeddb --- /dev/null +++ b/test/405-optimizing-long-allocator/info.txt @@ -0,0 +1 @@ +Tests with long for the optimizing compiler's register allocator. diff --git a/test/405-optimizing-long-allocator/src/Main.java b/test/405-optimizing-long-allocator/src/Main.java new file mode 100644 index 0000000000..9fd840b543 --- /dev/null +++ b/test/405-optimizing-long-allocator/src/Main.java @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Note that $opt$ is a marker for the optimizing compiler to ensure +// it compiles these methods. + +public class Main { + public static void main(String[] args) { + + expectEquals(4, $opt$TestLostCopy()); + expectEquals(-10, $opt$TestTwoLive()); + expectEquals(-20, $opt$TestThreeLive()); + expectEquals(5, $opt$TestFourLive()); + expectEquals(10, $opt$TestMultipleLive()); + expectEquals(1, $opt$TestWithBreakAndContinue()); + expectEquals(-15, $opt$testSpillInIf(5, 6, 7)); + expectEquals(-567, $opt$TestAgressiveLive1(1, 2, 3, 4, 5, 6, 7)); + expectEquals(-77, $opt$TestAgressiveLive2(1, 2, 3, 4, 5, 6, 7)); + + expectEquals(-55834574850L, $opt$testSpillInIf(5, 6L << 32, 7L << 32)); + expectEquals(-73014444553L, $opt$TestAgressiveLive1( + 1L << 32, (1L << 32) + 1, 3L << 32, 4L << 32, 5L << 32, 6L << 32, (1L << 32) + 2)); + expectEquals(-124554051632L, $opt$TestAgressiveLive2( + 1L << 32, (1L << 32) + 1, 3L << 32, 4L << 32, 5L << 32, 6L << 32, 7L << 32)); + } + + public static long $opt$TestLostCopy() { + long a = 0; + long b = 0; + do { + b = a; + a++; + } while (a != 5); + return b; + } + + public static long $opt$TestTwoLive() { + long a = 0; + long b = 0; + do { + a++; + b += 3; + } while (a != 5); + return a - b; + } + + public static long $opt$TestThreeLive() { + long a = 0; + long b = 0; + long c = 0; + do { + a++; + b += 3; + c += 2; + } while (a != 5); + return a - b - c; + } + + public static long $opt$TestFourLive() { + long a = 0; + long b = 0; + long c = 0; + long d = 0; + do { + a++; + b += 3; + c += 2; + d++; + } while (a != 5); + return d; + } + + public static long $opt$TestMultipleLive() { + long a = 0; + long b = 0; + long c = 0; + long d = 0; + long e = 0; + long f = 0; + long g = 0; + do { + a++; + b++; + c++; + d++; + e += 3; + f += 2; + g += 2; + } while (a != 5); + return f; + } + + public static long $opt$TestWithBreakAndContinue() { + long a = 0; + long b = 0; + do { + a++; + if (a == 2) { + continue; + } + b++; + if (a == 5) { + break; + } + } while (true); + return a - b; + } + + public static long $opt$testSpillInIf(long a, long b, long c) { + long d = 0; + long e = 0; + if (a == 5) { + b++; + c++; + d += 2; + e += 3; + } + + return a - b - c - d - e; + } + + public static long $opt$TestAgressiveLive1(long a, long b, long c, long d, long e, long f, long g) { + long h = a - b; + long i = c - d; + long j = e - f; + long k = 42 + g - a; + do { + b++; + while (k != 1) { + --k; + ++i; + if (i == 9) { + ++i; + } + j += 5; + } + k = 9; + h++; + } while (h != 5); + return a - b - c - d - e - f - g - h - i - j - k; + } + + public static long $opt$TestAgressiveLive2(long a, long b, long c, long d, long e, long f, long g) { + long h = a - b; + long i = c - d; + long j = e - f; + long k = 42 + g - a; + do { + h++; + } while (h != 5); + return a - b - c - d - e - f - g - h - i - j - k; + } + + public static void expectEquals(long expected, long value) { + if (expected != value) { + throw new Error("Expected: " + expected + ", got: " + value); + } + } +} |