diff options
Diffstat (limited to 'compiler/optimizing')
35 files changed, 598 insertions, 453 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index be432c5a20..06328f2490 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -73,8 +73,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { } } else { // General case when 'cond' is another instruction of type boolean. - // Negate with 'cond == 0'. - return new (allocator) HEqual(cond, graph->GetIntConstant(0)); + DCHECK_EQ(cond->GetType(), Primitive::Type::kPrimBoolean); + return new (allocator) HBooleanNot(cond); } } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 8736374306..f7fa5db8d5 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -802,10 +802,15 @@ void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend } } -void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) { +void CodeGenerator::EmitParallelMoves(Location from1, + Location to1, + Primitive::Type type1, + Location from2, + Location to2, + Primitive::Type type2) { HParallelMove parallel_move(GetGraph()->GetArena()); - parallel_move.AddMove(from1, to1, nullptr); - parallel_move.AddMove(from2, to2, nullptr); + parallel_move.AddMove(from1, to1, type1, nullptr); + parallel_move.AddMove(from2, to2, type2, nullptr); GetMoveResolver()->EmitNativeCode(¶llel_move); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index b888aca264..e536b2d0ee 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -244,7 +244,12 @@ class CodeGenerator { // of the architecture. static size_t GetCacheOffset(uint32_t index); - void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2); + void EmitParallelMoves(Location from1, + Location to1, + Primitive::Type type1, + Location from2, + Location to2, + Primitive::Type type2); static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { // Check that null value is not represented as an integer constant. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 332c99ae64..2ea920310a 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -141,8 +141,10 @@ class BoundsCheckSlowPathARM : public SlowPathCodeARM { codegen->EmitParallelMoves( index_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimInt, length_location_, - Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); } @@ -262,8 +264,10 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM { codegen->EmitParallelMoves( class_to_check_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, object_class_, - Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { arm_codegen->InvokeRuntime( @@ -750,8 +754,10 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { EmitParallelMoves( Location::RegisterLocation(source.AsRegisterPairHigh<Register>()), Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()), + Primitive::kPrimInt, Location::RegisterLocation(source.AsRegisterPairLow<Register>()), - Location::RegisterLocation(destination.AsRegisterPairLow<Register>())); + Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), + Primitive::kPrimInt); } else if (source.IsFpuRegister()) { UNIMPLEMENTED(FATAL); } else { @@ -789,8 +795,10 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { EmitParallelMoves( Location::StackSlot(source.GetStackIndex()), Location::StackSlot(destination.GetStackIndex()), + Primitive::kPrimInt, Location::StackSlot(source.GetHighStackIndex(kArmWordSize)), - Location::StackSlot(destination.GetHighStackIndex(kArmWordSize))); + Location::StackSlot(destination.GetHighStackIndex(kArmWordSize)), + Primitive::kPrimInt); } } } @@ -2657,6 +2665,20 @@ void InstructionCodeGeneratorARM::VisitNot(HNot* not_) { } } +void LocationsBuilderARM::VisitBooleanNot(HBooleanNot* bool_not) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM::VisitBooleanNot(HBooleanNot* bool_not) { + LocationSummary* locations = bool_not->GetLocations(); + Location out = locations->Out(); + Location in = locations->InAt(0); + __ eor(out.AsRegister<Register>(), in.AsRegister<Register>(), ShifterOperand(1)); +} + void LocationsBuilderARM::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 33eacbaf08..efc41e7c06 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -122,8 +122,8 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { // move resolver. InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves( - index_location_, LocationFrom(calling_convention.GetRegisterAt(0)), - length_location_, LocationFrom(calling_convention.GetRegisterAt(1))); + index_location_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, + length_location_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); @@ -322,8 +322,8 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { // move resolver. InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves( - class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)), - object_class_, LocationFrom(calling_convention.GetRegisterAt(1))); + class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot, + object_class_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime( @@ -466,8 +466,10 @@ void CodeGeneratorARM64::GenerateFrameEntry() { // sp[0] : current method. __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); GetAssembler()->cfi().AdjustCFAOffset(frame_size); - SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); - SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(), + frame_size - GetCoreSpillSize()); + GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(), + frame_size - FrameEntrySpillSize()); } } @@ -475,8 +477,10 @@ void CodeGeneratorARM64::GenerateFrameExit() { GetAssembler()->cfi().RememberState(); if (!HasEmptyFrame()) { int frame_size = GetFrameSize(); - UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); - UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(), + frame_size - FrameEntrySpillSize()); + GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(), + frame_size - GetCoreSpillSize()); __ Drop(frame_size); GetAssembler()->cfi().AdjustCFAOffset(-frame_size); } @@ -485,51 +489,6 @@ void CodeGeneratorARM64::GenerateFrameExit() { GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); } -static inline dwarf::Reg DWARFReg(CPURegister reg) { - if (reg.IsFPRegister()) { - return dwarf::Reg::Arm64Fp(reg.code()); - } else { - DCHECK_LT(reg.code(), 31u); // X0 - X30. - return dwarf::Reg::Arm64Core(reg.code()); - } -} - -void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) { - int size = registers.RegisterSizeInBytes(); - while (registers.Count() >= 2) { - const CPURegister& dst0 = registers.PopLowestIndex(); - const CPURegister& dst1 = registers.PopLowestIndex(); - __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset)); - GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); - GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size); - offset += 2 * size; - } - if (!registers.IsEmpty()) { - const CPURegister& dst0 = registers.PopLowestIndex(); - __ Str(dst0, MemOperand(__ StackPointer(), offset)); - GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); - } - DCHECK(registers.IsEmpty()); -} - -void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) { - int size = registers.RegisterSizeInBytes(); - while (registers.Count() >= 2) { - const CPURegister& dst0 = registers.PopLowestIndex(); - const CPURegister& dst1 = registers.PopLowestIndex(); - __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset)); - GetAssembler()->cfi().Restore(DWARFReg(dst0)); - GetAssembler()->cfi().Restore(DWARFReg(dst1)); - offset += 2 * size; - } - if (!registers.IsEmpty()) { - const CPURegister& dst0 = registers.PopLowestIndex(); - __ Ldr(dst0, MemOperand(__ StackPointer(), offset)); - GetAssembler()->cfi().Restore(DWARFReg(dst0)); - } - DCHECK(registers.IsEmpty()); -} - void CodeGeneratorARM64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } @@ -1380,7 +1339,7 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } @@ -2320,6 +2279,16 @@ void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { } } +void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) { + __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::Operand(1)); +} + void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 9430e31037..07c6dd059a 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -46,14 +46,11 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi const vixl::Register tr = vixl::x18; // Thread Register static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke. -const vixl::Register kQuickSuspendRegister = vixl::x19; const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); -// TODO: When the runtime does not use kQuickSuspendRegister as a suspend -// counter remove it from the reserved registers list. -const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr); +const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr); // Callee-saved registers defined by AAPCS64. const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister, @@ -227,8 +224,6 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; - void SpillRegisters(vixl::CPURegList registers, int offset); - void UnspillRegisters(vixl::CPURegList registers, int offset); vixl::CPURegList GetFramePreservedCoreRegisters() const { return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 38f9ef8efe..879216d59b 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -113,8 +113,10 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { x86_codegen->EmitParallelMoves( index_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimInt, length_location_, - Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds))); RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } @@ -266,8 +268,10 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 { x86_codegen->EmitParallelMoves( class_to_check_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, object_class_, - Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, @@ -655,8 +659,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { EmitParallelMoves( Location::RegisterLocation(source.AsRegisterPairHigh<Register>()), Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()), + Primitive::kPrimInt, Location::RegisterLocation(source.AsRegisterPairLow<Register>()), - Location::RegisterLocation(destination.AsRegisterPairLow<Register>())); + Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), + Primitive::kPrimInt); } else if (source.IsFpuRegister()) { LOG(FATAL) << "Unimplemented"; } else { @@ -699,8 +705,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { EmitParallelMoves( Location::StackSlot(source.GetStackIndex()), Location::StackSlot(destination.GetStackIndex()), + Primitive::kPrimInt, Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), - Location::StackSlot(destination.GetHighStackIndex(kX86WordSize))); + Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)), + Primitive::kPrimInt); } } } @@ -2915,6 +2923,21 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* not_) { } } +void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) { + LocationSummary* locations = bool_not->GetLocations(); + Location in = locations->InAt(0); + Location out = locations->Out(); + DCHECK(in.Equals(out)); + __ xorl(out.AsRegister<Register>(), Immediate(1)); +} + void LocationsBuilderX86::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); @@ -3841,43 +3864,23 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const { } void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) { - ScratchRegisterScope possible_scratch( - this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); - int temp = possible_scratch.GetRegister(); - if (temp == kNoRegister) { - // Use the stack. - __ pushl(Address(ESP, src)); - __ popl(Address(ESP, dst)); - } else { - Register temp_reg = static_cast<Register>(temp); - __ movl(temp_reg, Address(ESP, src)); - __ movl(Address(ESP, dst), temp_reg); - } + ScratchRegisterScope ensure_scratch( + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; + __ movl(temp_reg, Address(ESP, src + stack_offset)); + __ movl(Address(ESP, dst + stack_offset), temp_reg); } void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { - ScratchRegisterScope possible_scratch( - this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); - int temp = possible_scratch.GetRegister(); - if (temp == kNoRegister) { - // Use the stack instead. - // Push src low word. - __ pushl(Address(ESP, src)); - // Push src high word. Stack offset = 4. - __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */)); - - // Pop into dst high word. Stack offset = 8. - // Pop with ESP address uses the 'after increment' value of ESP. - __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */)); - // Finally dst low word. Stack offset = 4. - __ popl(Address(ESP, dst)); - } else { - Register temp_reg = static_cast<Register>(temp); - __ movl(temp_reg, Address(ESP, src)); - __ movl(Address(ESP, dst), temp_reg); - __ movl(temp_reg, Address(ESP, src + kX86WordSize)); - __ movl(Address(ESP, dst + kX86WordSize), temp_reg); - } + ScratchRegisterScope ensure_scratch( + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; + __ movl(temp_reg, Address(ESP, src + stack_offset)); + __ movl(Address(ESP, dst + stack_offset), temp_reg); + __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize)); + __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg); } void ParallelMoveResolverX86::EmitMove(size_t index) { @@ -3942,18 +3945,10 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ xorps(dest, dest); } else { ScratchRegisterScope ensure_scratch( - this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); - int temp_reg = ensure_scratch.GetRegister(); - if (temp_reg == kNoRegister) { - // Avoid spilling/restoring a scratch register by using the stack. - __ pushl(Immediate(value)); - __ movss(dest, Address(ESP, 0)); - __ addl(ESP, Immediate(4)); - } else { - Register temp = static_cast<Register>(temp_reg); - __ movl(temp, Immediate(value)); - __ movd(dest, temp); - } + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + Register temp = static_cast<Register>(ensure_scratch.GetRegister()); + __ movl(temp, Immediate(value)); + __ movd(dest, temp); } } else { DCHECK(destination.IsStackSlot()) << destination; @@ -4002,96 +3997,42 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { } } -void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) { - // Prefer to avoid xchg as it isn't speedy on smaller processors. - ScratchRegisterScope possible_scratch( - this, reg1, codegen_->GetNumberOfCoreRegisters()); - int temp_reg = possible_scratch.GetRegister(); - if (temp_reg == kNoRegister || temp_reg == reg2) { - __ pushl(reg1); - __ movl(reg1, reg2); - __ popl(reg2); - } else { - Register temp = static_cast<Register>(temp_reg); - __ movl(temp, reg1); - __ movl(reg1, reg2); - __ movl(reg2, temp); - } -} - void ParallelMoveResolverX86::Exchange(Register reg, int mem) { - ScratchRegisterScope possible_scratch( - this, reg, codegen_->GetNumberOfCoreRegisters()); - int temp_reg = possible_scratch.GetRegister(); - if (temp_reg == kNoRegister) { - __ pushl(Address(ESP, mem)); - __ movl(Address(ESP, mem + kX86WordSize), reg); - __ popl(reg); - } else { - Register temp = static_cast<Register>(temp_reg); - __ movl(temp, Address(ESP, mem)); - __ movl(Address(ESP, mem), reg); - __ movl(reg, temp); - } + Register suggested_scratch = reg == EAX ? EBX : EAX; + ScratchRegisterScope ensure_scratch( + this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; + __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset)); + __ movl(Address(ESP, mem + stack_offset), reg); + __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister())); } void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) { - ScratchRegisterScope possible_scratch( - this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); - int temp_reg = possible_scratch.GetRegister(); - if (temp_reg == kNoRegister) { - __ pushl(Address(ESP, mem)); - __ movss(Address(ESP, mem + kX86WordSize), reg); - __ movss(reg, Address(ESP, 0)); - __ addl(ESP, Immediate(kX86WordSize)); - } else { - Register temp = static_cast<Register>(temp_reg); - __ movl(temp, Address(ESP, mem)); - __ movss(Address(ESP, mem), reg); - __ movd(reg, temp); - } + ScratchRegisterScope ensure_scratch( + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + + Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); + int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; + __ movl(temp_reg, Address(ESP, mem + stack_offset)); + __ movss(Address(ESP, mem + stack_offset), reg); + __ movd(reg, temp_reg); } void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { - ScratchRegisterScope possible_scratch1( - this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); - int temp_reg1 = possible_scratch1.GetRegister(); - if (temp_reg1 == kNoRegister) { - // No free registers. Use the stack. - __ pushl(Address(ESP, mem1)); - __ pushl(Address(ESP, mem2 + kX86WordSize)); - // Pop with ESP address uses the 'after increment' value of ESP. - __ popl(Address(ESP, mem1 + kX86WordSize)); - __ popl(Address(ESP, mem2)); - } else { - // Got the first one. Try for a second. - ScratchRegisterScope possible_scratch2( - this, temp_reg1, codegen_->GetNumberOfCoreRegisters()); - int temp_reg2 = possible_scratch2.GetRegister(); - if (temp_reg2 == kNoRegister) { - Register temp = static_cast<Register>(temp_reg1); - // Bummer. Only have one free register to use. - // Save mem1 on the stack. - __ pushl(Address(ESP, mem1)); - - // Copy mem2 into mem1. - __ movl(temp, Address(ESP, mem2 + kX86WordSize)); - __ movl(Address(ESP, mem1 + kX86WordSize), temp); - - // Now pop mem1 into mem2. - // Pop with ESP address uses the 'after increment' value of ESP. - __ popl(Address(ESP, mem2)); - } else { - // Great. We have 2 registers to play with. - Register temp1 = static_cast<Register>(temp_reg1); - Register temp2 = static_cast<Register>(temp_reg2); - DCHECK_NE(temp1, temp2); - __ movl(temp1, Address(ESP, mem1)); - __ movl(temp2, Address(ESP, mem2)); - __ movl(Address(ESP, mem2), temp1); - __ movl(Address(ESP, mem1), temp2); - } - } + ScratchRegisterScope ensure_scratch1( + this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); + + Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX; + ScratchRegisterScope ensure_scratch2( + this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; + stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; + __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); + __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); + __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); + __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); } void ParallelMoveResolverX86::EmitSwap(size_t index) { @@ -4100,7 +4041,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>()); + __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 00a4323a54..368ae0fb0e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -106,7 +106,6 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver { X86Assembler* GetAssembler() const; private: - void Exchange(Register reg1, Register Reg2); void Exchange(Register reg, int mem); void Exchange(int mem1, int mem2); void Exchange32(XmmRegister reg, int mem); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 7a928d4d7d..a3d3490357 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -161,8 +161,10 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCodeX86_64 { codegen->EmitParallelMoves( index_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimInt, length_location_, - Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt); __ gs()->call(Address::Absolute( QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowArrayBounds), true)); RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); @@ -285,8 +287,10 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 { codegen->EmitParallelMoves( class_to_check_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, object_class_, - Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot); if (instruction_->IsInstanceOf()) { __ gs()->call( @@ -2974,6 +2978,21 @@ void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { } } +void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) { + LocationSummary* locations = bool_not->GetLocations(); + DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), + locations->Out().AsRegister<CpuRegister>().AsRegister()); + Location out = locations->Out(); + __ xorl(out.AsRegister<CpuRegister>(), Immediate(1)); +} + void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -3817,27 +3836,15 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { ScratchRegisterScope ensure_scratch( - this, TMP, codegen_->GetNumberOfCoreRegisters()); - - int temp_reg = ensure_scratch.GetRegister(); - if (temp_reg == kNoRegister) { - // Use the stack as a temporary. - // Save mem1 on the stack. - __ pushq(Address(CpuRegister(RSP), mem1)); - - // Copy mem2 into mem1. - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize)); - __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP)); + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); - // Now pop mem1 into mem2. - __ popq(Address(CpuRegister(RSP), mem2)); - } else { - CpuRegister temp = CpuRegister(temp_reg); - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1)); - __ movq(temp, Address(CpuRegister(RSP), mem2)); - __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP)); - __ movq(Address(CpuRegister(RSP), mem1), temp); - } + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movq(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); } void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { @@ -3846,13 +3853,6 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { __ movd(reg, CpuRegister(TMP)); } -void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { - // Prefer to avoid xchg as it isn't speedy on smaller processors. - __ movq(CpuRegister(TMP), reg1); - __ movq(reg1, reg2); - __ movq(reg2, CpuRegister(TMP)); -} - void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); __ movsd(Address(CpuRegister(RSP), mem), reg); @@ -3865,7 +3865,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); + __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 61bf6ac71d..b4876ef161 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -118,7 +118,6 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver { void Exchange32(CpuRegister reg, int mem); void Exchange32(XmmRegister reg, int mem); void Exchange32(int mem1, int mem2); - void Exchange64(CpuRegister reg1, CpuRegister reg2); void Exchange64(CpuRegister reg, int mem); void Exchange64(XmmRegister reg, int mem); void Exchange64(int mem1, int mem2); diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 2be117bf38..afcff1ef65 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -152,7 +152,7 @@ static void RunCodeOptimized(CodeGenerator* codegen, bool has_result, Expected expected) { graph->BuildDominatorTree(); - SsaLivenessAnalysis liveness(*graph, codegen); + SsaLivenessAnalysis liveness(graph, codegen); liveness.Analyze(); RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness); diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 966165bf4c..53f1f3c45c 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -194,7 +194,8 @@ static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst int64_t value = CodeGenerator::GetInt64ValueOf(constant); - if (instr->IsAdd() || instr->IsSub() || instr->IsCondition() || instr->IsCompare()) { + if (instr->IsAdd() || instr->IsSub() || instr->IsCondition() || + instr->IsCompare() || instr->IsBoundsCheck()) { // Uses aliases of ADD/SUB instructions. return vixl::Assembler::IsImmAddSub(value); } else if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) { diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 7623e421fd..61a7697301 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -36,7 +36,13 @@ static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_leng ASSERT_EQ(graph->GetBlocks().Size(), blocks_length); for (size_t i = 0, e = blocks_length; i < e; ++i) { if (blocks[i] == -1) { - ASSERT_EQ(nullptr, graph->GetBlocks().Get(i)->GetDominator()); + if (graph->GetBlocks().Get(i) == nullptr) { + // Dead block. + } else { + // Only the entry block has no dominator. + ASSERT_EQ(nullptr, graph->GetBlocks().Get(i)->GetDominator()); + ASSERT_TRUE(graph->GetBlocks().Get(i)->IsEntryBlock()); + } } else { ASSERT_NE(nullptr, graph->GetBlocks().Get(i)->GetDominator()); ASSERT_EQ(blocks[i], graph->GetBlocks().Get(i)->GetDominator()->GetBlockId()); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 7c3c2bf03d..906c8e8c76 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -369,26 +369,40 @@ void SSAChecker::VisitPhi(HPhi* phi) { } } -void SSAChecker::VisitIf(HIf* instruction) { - VisitInstruction(instruction); - HInstruction* input = instruction->InputAt(0); +void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) { + HInstruction* input = instruction->InputAt(input_index); if (input->IsIntConstant()) { - int value = input->AsIntConstant()->GetValue(); + int32_t value = input->AsIntConstant()->GetValue(); if (value != 0 && value != 1) { AddError(StringPrintf( - "If instruction %d has a non-Boolean constant input " - "whose value is: %d.", + "%s instruction %d has a non-Boolean constant input %d whose value is: %d.", + instruction->DebugName(), instruction->GetId(), + static_cast<int>(input_index), value)); } - } else if (instruction->InputAt(0)->GetType() != Primitive::kPrimBoolean) { + } else if (input->GetType() == Primitive::kPrimInt && input->IsPhi()) { + // TODO: We need a data-flow analysis which determines if the Phi is boolean. + } else if (input->GetType() != Primitive::kPrimBoolean) { AddError(StringPrintf( - "If instruction %d has a non-Boolean input type: %s.", + "%s instruction %d has a non-Boolean input %d whose type is: %s.", + instruction->DebugName(), instruction->GetId(), - Primitive::PrettyDescriptor(instruction->InputAt(0)->GetType()))); + static_cast<int>(input_index), + Primitive::PrettyDescriptor(input->GetType()))); } } +void SSAChecker::VisitIf(HIf* instruction) { + VisitInstruction(instruction); + HandleBooleanInput(instruction, 0); +} + +void SSAChecker::VisitBooleanNot(HBooleanNot* instruction) { + VisitInstruction(instruction); + HandleBooleanInput(instruction, 0); +} + void SSAChecker::VisitCondition(HCondition* op) { VisitInstruction(op); if (op->GetType() != Primitive::kPrimBoolean) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 89fea0a07f..4568a5ef9d 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -107,8 +107,11 @@ class SSAChecker : public GraphChecker { void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE; void VisitCondition(HCondition* op) OVERRIDE; void VisitIf(HIf* instruction) OVERRIDE; + void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE; void VisitConstant(HConstant* instruction) OVERRIDE; + void HandleBooleanInput(HInstruction* instruction, size_t input_index); + private: DISALLOW_COPY_AND_ASSIGN(SSAChecker); }; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 87a8d33193..6d2a8d77e2 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -262,10 +262,6 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, graph_->SetHasArrayAccesses(true); } - // Now that we have inlined the callee, we need to update the next - // instruction id of the caller, so that new instructions added - // after optimizations get a unique id. - graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId()); return true; } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 94e27e912e..9a6062fedf 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -94,7 +94,7 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorA Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); Location actual_loc = locations->InAt(i); - parallel_move.AddMove(actual_loc, cc_loc, nullptr); + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); } codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d1176c460f..d3a4e6ca15 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -103,7 +103,7 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorA Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); Location actual_loc = locations->InAt(i); - parallel_move.AddMove(actual_loc, cc_loc, nullptr); + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); } codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index aec2d19b1d..3c7a2660db 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -128,7 +128,7 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); Location actual_loc = locations->InAt(i); - parallel_move.AddMove(actual_loc, cc_loc, nullptr); + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); } codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index cbf94f0f81..d9a1c31c77 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -120,7 +120,7 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); Location actual_loc = locations->InAt(i); - parallel_move.AddMove(actual_loc, cc_loc, nullptr); + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); } codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 28c5555d57..7818c606db 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -50,12 +50,12 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); - ASSERT_EQ(liveness.GetLinearOrder().Size(), number_of_blocks); + ASSERT_EQ(graph->GetLinearOrder().Size(), number_of_blocks); for (size_t i = 0; i < number_of_blocks; ++i) { - ASSERT_EQ(liveness.GetLinearOrder().Get(i)->GetBlockId(), expected_order[i]); + ASSERT_EQ(graph->GetLinearOrder().Get(i)->GetBlockId(), expected_order[i]); } } diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 61d6593f2b..52367730ed 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -69,7 +69,7 @@ TEST(LiveRangesTest, CFG1) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -117,7 +117,7 @@ TEST(LiveRangesTest, CFG2) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -168,7 +168,7 @@ TEST(LiveRangesTest, CFG3) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Test for the 4 constant. @@ -247,7 +247,7 @@ TEST(LiveRangesTest, Loop1) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Test for the 0 constant. @@ -327,7 +327,7 @@ TEST(LiveRangesTest, Loop2) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Test for the 0 constant. @@ -405,7 +405,7 @@ TEST(LiveRangesTest, CFG4) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Test for the 0 constant. diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 81250ca133..8a96ee9ace 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -57,7 +57,7 @@ static void TestCode(const uint16_t* data, const char* expected) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); std::ostringstream buffer; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d8a8554610..5fca4fab22 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -51,9 +51,7 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit for (size_t i = 0; i < blocks_.Size(); ++i) { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_.Get(i); - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - RemoveAsUser(it.Current()); - } + DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage"; for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { RemoveAsUser(it.Current()); } @@ -61,19 +59,17 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit } } -void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const { +void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) { for (size_t i = 0; i < blocks_.Size(); ++i) { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_.Get(i); + // We only need to update the successor, which might be live. for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { block->GetSuccessors().Get(j)->RemovePredecessor(block); } - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - block->RemovePhi(it.Current()->AsPhi(), /*ensure_safety=*/ false); - } - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - block->RemoveInstruction(it.Current(), /*ensure_safety=*/ false); - } + // Remove the block from the list of blocks, so that further analyses + // never see it. + blocks_.Put(i, nullptr); } } } @@ -258,6 +254,7 @@ void HGraph::SimplifyCFG() { // (2): Simplify loops by having only one back edge, and one preheader. for (size_t i = 0; i < blocks_.Size(); ++i) { HBasicBlock* block = blocks_.Get(i); + if (block == nullptr) continue; if (block->GetSuccessors().Size() > 1) { for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { HBasicBlock* successor = block->GetSuccessors().Get(j); @@ -274,8 +271,9 @@ void HGraph::SimplifyCFG() { } bool HGraph::AnalyzeNaturalLoops() const { - for (size_t i = 0; i < blocks_.Size(); ++i) { - HBasicBlock* block = blocks_.Get(i); + // Order does not matter. + for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { HLoopInformation* info = block->GetLoopInformation(); if (!info->Populate()) { @@ -964,23 +962,6 @@ static void MakeRoomFor(GrowableArray<HBasicBlock*>* blocks, } void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { - // Walk over the entry block and: - // - Move constants from the entry block to the outer_graph's entry block, - // - Replace HParameterValue instructions with their real value. - // - Remove suspend checks, that hold an environment. - int parameter_index = 0; - for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* current = it.Current(); - if (current->IsConstant()) { - current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction()); - } else if (current->IsParameterValue()) { - current->ReplaceWith(invoke->InputAt(parameter_index++)); - } else { - DCHECK(current->IsGoto() || current->IsSuspendCheck()); - entry_block_->RemoveInstruction(current); - } - } - if (GetBlocks().Size() == 3) { // Simple case of an entry block, a body block, and an exit block. // Put the body block's instruction into `invoke`'s block. @@ -1106,6 +1087,36 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } } + // Update the next instruction id of the outer graph, so that instructions + // added later get bigger ids than those in the inner graph. + outer_graph->SetCurrentInstructionId(GetNextInstructionId()); + + // Walk over the entry block and: + // - Move constants from the entry block to the outer_graph's entry block, + // - Replace HParameterValue instructions with their real value. + // - Remove suspend checks, that hold an environment. + // We must do this after the other blocks have been inlined, otherwise ids of + // constants could overlap with the inner graph. + int parameter_index = 0; + for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* current = it.Current(); + if (current->IsNullConstant()) { + current->ReplaceWith(outer_graph->GetNullConstant()); + } else if (current->IsIntConstant()) { + current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue())); + } else if (current->IsLongConstant()) { + current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue())); + } else if (current->IsFloatConstant() || current->IsDoubleConstant()) { + // TODO: Don't duplicate floating-point constants. + current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction()); + } else if (current->IsParameterValue()) { + current->ReplaceWith(invoke->InputAt(parameter_index++)); + } else { + DCHECK(current->IsGoto() || current->IsSuspendCheck()); + entry_block_->RemoveInstruction(current); + } + } + // Finally remove the invoke from the caller. invoke->GetBlock()->RemoveInstruction(invoke); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 5f50494482..fe47939359 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -112,6 +112,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { : arena_(arena), blocks_(arena, kDefaultNumberOfBlocks), reverse_post_order_(arena, kDefaultNumberOfBlocks), + linear_order_(arena, kDefaultNumberOfBlocks), entry_block_(nullptr), exit_block_(nullptr), maximum_number_of_out_vregs_(0), @@ -216,6 +217,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { return reverse_post_order_; } + const GrowableArray<HBasicBlock*>& GetLinearOrder() const { + return linear_order_; + } + bool HasArrayAccesses() const { return has_array_accesses_; } @@ -248,7 +253,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { ArenaBitVector* visited, ArenaBitVector* visiting); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; - void RemoveDeadBlocks(const ArenaBitVector& visited) const; + void RemoveDeadBlocks(const ArenaBitVector& visited); template <class InstType, typename ValueType> InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache); @@ -262,6 +267,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // List of blocks to perform a reverse post order tree traversal. GrowableArray<HBasicBlock*> reverse_post_order_; + // List of blocks to perform a linear order tree traversal. + GrowableArray<HBasicBlock*> linear_order_; + HBasicBlock* entry_block_; HBasicBlock* exit_block_; @@ -293,6 +301,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_; ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_; + friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); DISALLOW_COPY_AND_ASSIGN(HGraph); }; @@ -676,6 +685,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ArrayGet, Instruction) \ M(ArrayLength, Instruction) \ M(ArraySet, Instruction) \ + M(BooleanNot, UnaryOperation) \ M(BoundsCheck, Instruction) \ M(BoundType, Instruction) \ M(CheckCast, Instruction) \ @@ -2643,6 +2653,33 @@ class HNot : public HUnaryOperation { DISALLOW_COPY_AND_ASSIGN(HNot); }; +class HBooleanNot : public HUnaryOperation { + public: + explicit HBooleanNot(HInstruction* input) + : HUnaryOperation(Primitive::Type::kPrimBoolean, input) {} + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + UNUSED(other); + return true; + } + + int32_t Evaluate(int32_t x) const OVERRIDE { + DCHECK(IsUint<1>(x)); + return !x; + } + + int64_t Evaluate(int64_t x ATTRIBUTE_UNUSED) const OVERRIDE { + LOG(FATAL) << DebugName() << " cannot be used with 64-bit values"; + UNREACHABLE(); + } + + DECLARE_INSTRUCTION(BooleanNot); + + private: + DISALLOW_COPY_AND_ASSIGN(HBooleanNot); +}; + class HTypeConversion : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. @@ -3418,8 +3455,11 @@ class HMonitorOperation : public HTemplateInstruction<1> { class MoveOperands : public ArenaObject<kArenaAllocMisc> { public: - MoveOperands(Location source, Location destination, HInstruction* instruction) - : source_(source), destination_(destination), instruction_(instruction) {} + MoveOperands(Location source, + Location destination, + Primitive::Type type, + HInstruction* instruction) + : source_(source), destination_(destination), type_(type), instruction_(instruction) {} Location GetSource() const { return source_; } Location GetDestination() const { return destination_; } @@ -3467,11 +3507,17 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> { return source_.IsInvalid(); } + bool Is64BitMove() const { + return Primitive::Is64BitType(type_); + } + HInstruction* GetInstruction() const { return instruction_; } private: Location source_; Location destination_; + // The type this move is for. + Primitive::Type type_; // The instruction this move is assocatied with. Null when this move is // for moving an input in the expected locations of user (including a phi user). // This is only used in debug mode, to ensure we do not connect interval siblings @@ -3486,7 +3532,10 @@ class HParallelMove : public HTemplateInstruction<0> { explicit HParallelMove(ArenaAllocator* arena) : HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {} - void AddMove(Location source, Location destination, HInstruction* instruction) { + void AddMove(Location source, + Location destination, + Primitive::Type type, + HInstruction* instruction) { DCHECK(source.IsValid()); DCHECK(destination.IsValid()); if (kIsDebugBuild) { @@ -3512,7 +3561,7 @@ class HParallelMove : public HTemplateInstruction<0> { << "Same destination for two moves in a parallel move."; } } - moves_.Add(MoveOperands(source, destination, instruction)); + moves_.Add(MoveOperands(source, destination, type, instruction)); } MoveOperands* MoveOperandsAt(size_t index) const { @@ -3628,6 +3677,43 @@ class HPostOrderIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator); }; +class HLinearPostOrderIterator : public ValueObject { + public: + explicit HLinearPostOrderIterator(const HGraph& graph) + : order_(graph.GetLinearOrder()), index_(graph.GetLinearOrder().Size()) {} + + bool Done() const { return index_ == 0; } + + HBasicBlock* Current() const { return order_.Get(index_ -1); } + + void Advance() { + --index_; + DCHECK_GE(index_, 0U); + } + + private: + const GrowableArray<HBasicBlock*>& order_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); +}; + +class HLinearOrderIterator : public ValueObject { + public: + explicit HLinearOrderIterator(const HGraph& graph) + : order_(graph.GetLinearOrder()), index_(0) {} + + bool Done() const { return index_ == order_.Size(); } + HBasicBlock* Current() const { return order_.Get(index_); } + void Advance() { ++index_; } + + private: + const GrowableArray<HBasicBlock*>& order_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); +}; + // Iterator over the blocks that art part of the loop. Includes blocks part // of an inner loop. The order in which the blocks are iterated is on their // block id. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 64a066dea9..a17d6e1822 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -363,7 +363,7 @@ static void AllocateRegisters(HGraph* graph, CodeGenerator* codegen, PassInfoPrinter* pass_info_printer) { PrepareForRegisterAllocation(graph).Run(); - SsaLivenessAnalysis liveness(*graph, codegen); + SsaLivenessAnalysis liveness(graph, codegen); { PassInfo pass_info(SsaLivenessAnalysis::kLivenessPassName, pass_info_printer); liveness.Analyze(); diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 4936685367..ad92ca59a1 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -189,9 +189,9 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { const MoveOperands& other_move = *moves_.Get(i); if (other_move.Blocks(destination)) { DCHECK(other_move.IsPending()); - if (!destination.IsPair() && other_move.GetSource().IsPair()) { - // We swap pairs before swapping non-pairs. Go back from the - // cycle by returning the pair that must be swapped. + if (!move->Is64BitMove() && other_move.Is64BitMove()) { + // We swap 64bits moves before swapping 32bits moves. Go back from the + // cycle by returning the move that must be swapped. return moves_.Get(i); } do_swap = true; @@ -216,7 +216,7 @@ MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { UpdateSourceOf(moves_.Get(i), swap_destination, source); } } - // If the swap was required because of a pair in the middle of a cycle, + // If the swap was required because of a 64bits move in the middle of a cycle, // we return the swapped move, so that the caller knows it needs to re-iterate // its dependency loop. return required_swap; @@ -269,20 +269,6 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, } -int ParallelMoveResolver::AllocateScratchRegister(int blocked, - int register_count) { - int scratch = -1; - for (int reg = 0; reg < register_count; ++reg) { - if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) { - scratch = reg; - break; - } - } - - return scratch; -} - - ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) : resolver_(resolver), @@ -296,16 +282,6 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( } -ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( - ParallelMoveResolver* resolver, int blocked, int number_of_registers) - : resolver_(resolver), - reg_(kNoRegister), - spilled_(false) { - // We don't want to spill a register if none are free. - reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers); -} - - ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { if (spilled_) { resolver_->RestoreScratch(reg_); diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 173cffc71e..95f8ad5b74 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -42,15 +42,10 @@ class ParallelMoveResolver : public ValueObject { protected: class ScratchRegisterScope : public ValueObject { public: - // Spill a scratch register if no regs are free. ScratchRegisterScope(ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers); - // Grab a scratch register only if available. - ScratchRegisterScope(ParallelMoveResolver* resolver, - int blocked, - int number_of_registers); ~ScratchRegisterScope(); int GetRegister() const { return reg_; } @@ -67,8 +62,6 @@ class ParallelMoveResolver : public ValueObject { // Allocate a scratch register for performing a move. The method will try to use // a register that is the destination of a move, but that move has not been emitted yet. int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled); - // As above, but return -1 if no free register. - int AllocateScratchRegister(int blocked, int register_count); // Emit a move. virtual void EmitMove(size_t index) = 0; @@ -92,12 +85,18 @@ class ParallelMoveResolver : public ValueObject { // other moves to satisfy dependencies). // // Return whether another move in the dependency cycle needs to swap. This - // is to handle pair swaps, where we want the pair to swap first to avoid - // building pairs that are unexpected by the code generator. For example, if - // we were to swap R1 with R2, we would need to update all locations using - // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make - // the code generator understand such pairs, but it's easier and cleaner to - // just not create such pairs and exchange pairs in priority. + // is to handle 64bits swaps: + // 1) In the case of register pairs, where we want the pair to swap first to avoid + // building pairs that are unexpected by the code generator. For example, if + // we were to swap R1 with R2, we would need to update all locations using + // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make + // the code generator understand such pairs, but it's easier and cleaner to + // just not create such pairs and exchange pairs in priority. + // 2) Even when the architecture does not have pairs, we must handle 64bits swaps + // first. Consider the case: (R0->R1) (R1->S) (S->R0), where 'S' is a single + // stack slot. If we end up swapping S and R0, S will only contain the low bits + // of R0. If R0->R1 is for a 64bits instruction, R1 will therefore not contain + // the right value. MoveOperands* PerformMove(size_t index); DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 5c502f7ef4..95cca5172b 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -87,6 +87,7 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, moves->AddMove( Location::RegisterLocation(operands[i][0]), Location::RegisterLocation(operands[i][1]), + Primitive::kPrimInt, nullptr); } return moves; @@ -145,10 +146,12 @@ TEST(ParallelMoveTest, ConstantLast) { moves->AddMove( Location::ConstantLocation(new (&allocator) HIntConstant(0)), Location::RegisterLocation(0), + Primitive::kPrimInt, nullptr); moves->AddMove( Location::RegisterLocation(1), Location::RegisterLocation(2), + Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str()); @@ -164,10 +167,12 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(4), + Primitive::kPrimInt, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str()); @@ -179,10 +184,12 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, nullptr); moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(4), + Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str()); @@ -194,10 +201,12 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, nullptr); moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(0), + Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); @@ -208,14 +217,17 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(7), + Primitive::kPrimInt, nullptr); moves->AddMove( Location::RegisterLocation(7), Location::RegisterLocation(1), + Primitive::kPrimInt, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); @@ -226,14 +238,17 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(7), + Primitive::kPrimInt, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, nullptr); moves->AddMove( Location::RegisterLocation(7), Location::RegisterLocation(1), + Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); @@ -244,14 +259,17 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, nullptr); moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(7), + Primitive::kPrimInt, nullptr); moves->AddMove( Location::RegisterLocation(7), Location::RegisterLocation(1), + Primitive::kPrimInt, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); @@ -262,10 +280,12 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, nullptr); moves->AddMove( Location::RegisterPairLocation(2, 3), Location::RegisterPairLocation(0, 1), + Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str()); @@ -276,10 +296,12 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(2, 3), Location::RegisterPairLocation(0, 1), + Primitive::kPrimLong, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); @@ -292,18 +314,71 @@ TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterLocation(10), Location::RegisterLocation(5), + Primitive::kPrimInt, nullptr); moves->AddMove( Location::RegisterPairLocation(4, 5), Location::DoubleStackSlot(32), + Primitive::kPrimLong, nullptr); moves->AddMove( Location::DoubleStackSlot(32), Location::RegisterPairLocation(10, 11), + Primitive::kPrimLong, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str()); } } +// Test that we do 64bits moves before 32bits moves. +TEST(ParallelMoveTest, CyclesWith64BitsMoves) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(0), + Location::RegisterLocation(1), + Primitive::kPrimLong, + nullptr); + moves->AddMove( + Location::RegisterLocation(1), + Location::StackSlot(48), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::StackSlot(48), + Location::RegisterLocation(0), + Primitive::kPrimInt, + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str()); + } + + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + Primitive::kPrimLong, + nullptr); + moves->AddMove( + Location::RegisterPairLocation(2, 3), + Location::DoubleStackSlot(32), + Primitive::kPrimLong, + nullptr); + moves->AddMove( + Location::DoubleStackSlot(32), + Location::RegisterPairLocation(0, 1), + Primitive::kPrimLong, + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str()); + } +} + } // namespace art diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index c20c8a172d..af93438c9a 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -65,6 +65,10 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { if (equivalent->IsPhi()) { equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); + } else if (equivalent == input) { + // The input has changed its type. It can be an input of other phis, + // so we need to put phi users in the work list. + AddDependentInstructionsToWorklist(equivalent); } } } @@ -117,10 +121,10 @@ void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) { worklist_.Add(instruction); } -void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) { +void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr && phi->IsLive()) { + if (phi != nullptr && phi->IsLive() && phi->GetType() != instruction->GetType()) { AddToWorklist(phi); } } diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h index 1374cbb6ab..6d370ed2ab 100644 --- a/compiler/optimizing/primitive_type_propagation.h +++ b/compiler/optimizing/primitive_type_propagation.h @@ -33,7 +33,7 @@ class PrimitiveTypePropagation : public ValueObject { void VisitBasicBlock(HBasicBlock* block); void ProcessWorklist(); void AddToWorklist(HPhi* phi); - void AddDependentInstructionsToWorklist(HPhi* phi); + void AddDependentInstructionsToWorklist(HInstruction* instruction); bool UpdateType(HPhi* phi); HGraph* const graph_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 4bca43499f..a02b1dacf7 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -103,7 +103,7 @@ void RegisterAllocator::AllocateRegisters() { // Since only parallel moves have been inserted during the register allocation, // these checks are mostly for making sure these moves have been added correctly. size_t current_liveness = 0; - for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) { + for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HInstruction* instruction = inst_it.Current(); @@ -147,7 +147,7 @@ void RegisterAllocator::BlockRegister(Location location, void RegisterAllocator::AllocateRegistersInternal() { // Iterate post-order, to ensure the list is sorted, and the last added interval // is the one with the lowest start position. - for (HLinearPostOrderIterator it(liveness_); !it.Done(); it.Advance()) { + for (HLinearPostOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done(); back_it.Advance()) { @@ -224,7 +224,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { temp_intervals_.Add(interval); interval->AddTempUse(instruction, i); if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { - interval->AddHighInterval(true); + interval->AddHighInterval(/* is_temp */ true); LiveInterval* high = interval->GetHighInterval(); temp_intervals_.Add(high); unhandled_fp_intervals_.Add(high); @@ -310,6 +310,29 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { current->AddHighInterval(); } + for (size_t safepoint_index = safepoints_.Size(); safepoint_index > 0; --safepoint_index) { + HInstruction* safepoint = safepoints_.Get(safepoint_index - 1); + size_t safepoint_position = safepoint->GetLifetimePosition(); + + // Test that safepoints are ordered in the optimal way. + DCHECK(safepoint_index == safepoints_.Size() + || safepoints_.Get(safepoint_index)->GetLifetimePosition() < safepoint_position); + + if (safepoint_position == current->GetStart()) { + // The safepoint is for this instruction, so the location of the instruction + // does not need to be saved. + DCHECK_EQ(safepoint_index, safepoints_.Size()); + DCHECK_EQ(safepoint, instruction); + continue; + } else if (current->IsDeadAt(safepoint_position)) { + break; + } else if (!current->Covers(safepoint_position)) { + // Hole in the interval. + continue; + } + current->AddSafepoint(safepoint); + } + // Some instructions define their output in fixed register/stack slot. We need // to ensure we know these locations before doing register allocation. For a // given register, we create an interval that covers these locations. The register @@ -1204,10 +1227,10 @@ void RegisterAllocator::AddMove(HParallelMove* move, && codegen_->ShouldSplitLongMoves() // The parallel move resolver knows how to deal with long constants. && !source.IsConstant()) { - move->AddMove(source.ToLow(), destination.ToLow(), instruction); - move->AddMove(source.ToHigh(), destination.ToHigh(), nullptr); + move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction); + move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr); } else { - move->AddMove(source, destination, instruction); + move->AddMove(source, destination, type, instruction); } } @@ -1399,7 +1422,6 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { : Location::StackSlot(interval->GetParent()->GetSpillSlot())); } UsePosition* use = current->GetFirstUse(); - size_t safepoint_index = safepoints_.Size(); // Walk over all siblings, updating locations of use positions, and // connecting them when they are adjacent. @@ -1450,28 +1472,12 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination); } - // At each safepoint, we record stack and register information. - // We iterate backwards to test safepoints in ascending order of positions, - // which is what LiveInterval::Covers is optimized for. - for (; safepoint_index > 0; --safepoint_index) { - HInstruction* safepoint = safepoints_.Get(safepoint_index - 1); - size_t position = safepoint->GetLifetimePosition(); - - // Test that safepoints are ordered in the optimal way. - DCHECK(safepoint_index == safepoints_.Size() - || safepoints_.Get(safepoint_index)->GetLifetimePosition() <= position); - - if (current->IsDeadAt(position)) { - break; - } else if (!current->Covers(position)) { - continue; - } else if (interval->GetStart() == position) { - // The safepoint is for this instruction, so the location of the instruction - // does not need to be saved. - continue; - } + for (SafepointPosition* safepoint_position = current->GetFirstSafepoint(); + safepoint_position != nullptr; + safepoint_position = safepoint_position->GetNext()) { + DCHECK(current->Covers(safepoint_position->GetPosition())); - LocationSummary* locations = safepoint->GetLocations(); + LocationSummary* locations = safepoint_position->GetLocations(); if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); } @@ -1589,7 +1595,7 @@ void RegisterAllocator::Resolve() { maximum_number_of_live_core_registers_, maximum_number_of_live_fp_registers_, reserved_out_slots_, - liveness_.GetLinearOrder()); + codegen_->GetGraph()->GetLinearOrder()); // Adjust the Out Location of instructions. // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. @@ -1669,7 +1675,7 @@ void RegisterAllocator::Resolve() { } // Resolve non-linear control flow across branches. Order does not matter. - for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) { + for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); BitVector* live = liveness_.GetLiveInSet(*block); for (uint32_t idx : live->Indexes()) { @@ -1682,7 +1688,7 @@ void RegisterAllocator::Resolve() { } // Resolve phi inputs. Order does not matter. - for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) { + for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { HBasicBlock* current = it.Current(); for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HInstruction* phi = inst_it.Current(); diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 3951439881..c307d98555 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -46,7 +46,7 @@ static bool Check(const uint16_t* data) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); @@ -306,7 +306,7 @@ TEST(RegisterAllocatorTest, Loop3) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); @@ -340,7 +340,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); HXor* first_xor = graph->GetBlocks().Get(1)->GetFirstInstruction()->AsXor(); @@ -395,7 +395,7 @@ TEST(RegisterAllocatorTest, DeadPhi) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); @@ -419,7 +419,7 @@ TEST(RegisterAllocatorTest, FreeUntil) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -523,7 +523,7 @@ TEST(RegisterAllocatorTest, PhiHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Check that the register allocator is deterministic. @@ -540,7 +540,7 @@ TEST(RegisterAllocatorTest, PhiHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Set the phi to a specific register, and check that the inputs get allocated @@ -559,7 +559,7 @@ TEST(RegisterAllocatorTest, PhiHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Set input1 to a specific register, and check that the phi and other input get allocated @@ -578,7 +578,7 @@ TEST(RegisterAllocatorTest, PhiHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Set input2 to a specific register, and check that the phi and other input get allocated @@ -632,7 +632,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -647,7 +647,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // Check that the field gets put in the register expected by its use. @@ -699,7 +699,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -715,7 +715,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); // check that both adds get the same register. @@ -766,7 +766,7 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -856,7 +856,7 @@ TEST(RegisterAllocatorTest, SpillInactive) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(*graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.unhandled_core_intervals_.Add(fourth); diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 95da6ef551..302df2a1d2 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -69,9 +69,9 @@ void SsaLivenessAnalysis::LinearizeGraph() { // current reverse post order in the graph, but it would require making // order queries to a GrowableArray, which is not the best data structure // for it. - GrowableArray<uint32_t> forward_predecessors(graph_.GetArena(), graph_.GetBlocks().Size()); - forward_predecessors.SetSize(graph_.GetBlocks().Size()); - for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) { + GrowableArray<uint32_t> forward_predecessors(graph_->GetArena(), graph_->GetBlocks().Size()); + forward_predecessors.SetSize(graph_->GetBlocks().Size()); + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); size_t number_of_forward_predecessors = block->GetPredecessors().Size(); if (block->IsLoopHeader()) { @@ -86,11 +86,11 @@ void SsaLivenessAnalysis::LinearizeGraph() { // iterate over the successors. When all non-back edge predecessors of a // successor block are visited, the successor block is added in the worklist // following an order that satisfies the requirements to build our linear graph. - GrowableArray<HBasicBlock*> worklist(graph_.GetArena(), 1); - worklist.Add(graph_.GetEntryBlock()); + GrowableArray<HBasicBlock*> worklist(graph_->GetArena(), 1); + worklist.Add(graph_->GetEntryBlock()); do { HBasicBlock* current = worklist.Pop(); - linear_order_.Add(current); + graph_->linear_order_.Add(current); for (size_t i = 0, e = current->GetSuccessors().Size(); i < e; ++i) { HBasicBlock* successor = current->GetSuccessors().Get(i); int block_id = successor->GetBlockId(); @@ -115,7 +115,7 @@ void SsaLivenessAnalysis::NumberInstructions() { // to differentiate between the start and end of an instruction. Adding 2 to // the lifetime position for each instruction ensures the start of an // instruction is different than the end of the previous instruction. - for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { + for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block->SetLifetimeStart(lifetime_position); @@ -127,7 +127,7 @@ void SsaLivenessAnalysis::NumberInstructions() { instructions_from_ssa_index_.Add(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( - LiveInterval::MakeInterval(graph_.GetArena(), current->GetType(), current)); + LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current)); } current->SetLifetimePosition(lifetime_position); } @@ -145,7 +145,7 @@ void SsaLivenessAnalysis::NumberInstructions() { instructions_from_ssa_index_.Add(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( - LiveInterval::MakeInterval(graph_.GetArena(), current->GetType(), current)); + LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current)); } instructions_from_lifetime_position_.Add(current); current->SetLifetimePosition(lifetime_position); @@ -158,11 +158,11 @@ void SsaLivenessAnalysis::NumberInstructions() { } void SsaLivenessAnalysis::ComputeLiveness() { - for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { + for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block_infos_.Put( block->GetBlockId(), - new (graph_.GetArena()) BlockInfo(graph_.GetArena(), *block, number_of_ssa_values_)); + new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_)); } // Compute the live ranges, as well as the initial live_in, live_out, and kill sets. @@ -179,7 +179,7 @@ void SsaLivenessAnalysis::ComputeLiveness() { void SsaLivenessAnalysis::ComputeLiveRanges() { // Do a post order visit, adding inputs of instructions live in the block where // that instruction is defined, and killing instructions that are being visited. - for (HLinearPostOrderIterator it(*this); !it.Done(); it.Advance()) { + for (HLinearPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); BitVector* kill = GetKillSet(*block); @@ -281,7 +281,7 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() { do { changed = false; - for (HPostOrderIterator it(graph_); !it.Done(); it.Advance()) { + for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { const HBasicBlock& block = *it.Current(); // The live_in set depends on the kill set (which does not diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index d2da84c0c0..98f98a29d1 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -149,6 +149,39 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { DISALLOW_COPY_AND_ASSIGN(UsePosition); }; +class SafepointPosition : public ArenaObject<kArenaAllocMisc> { + public: + explicit SafepointPosition(HInstruction* instruction) + : instruction_(instruction), + next_(nullptr) {} + + void SetNext(SafepointPosition* next) { + next_ = next; + } + + size_t GetPosition() const { + return instruction_->GetLifetimePosition(); + } + + SafepointPosition* GetNext() const { + return next_; + } + + LocationSummary* GetLocations() const { + return instruction_->GetLocations(); + } + + HInstruction* GetInstruction() const { + return instruction_; + } + + private: + HInstruction* const instruction_; + SafepointPosition* next_; + + DISALLOW_COPY_AND_ASSIGN(SafepointPosition); +}; + /** * An interval is a list of disjoint live ranges where an instruction is live. * Each instruction that has uses gets an interval. @@ -459,6 +492,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return defined_by_; } + SafepointPosition* FindSafepointJustBefore(size_t position) const { + for (SafepointPosition* safepoint = first_safepoint_, *previous = nullptr; + safepoint != nullptr; + previous = safepoint, safepoint = safepoint->GetNext()) { + if (safepoint->GetPosition() >= position) return previous; + } + return last_safepoint_; + } + /** * Split this interval at `position`. This interval is changed to: * [start ... position). @@ -477,6 +519,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } LiveInterval* new_interval = new (allocator_) LiveInterval(allocator_, type_); + SafepointPosition* new_last_safepoint = FindSafepointJustBefore(position); + if (new_last_safepoint == nullptr) { + new_interval->first_safepoint_ = first_safepoint_; + new_interval->last_safepoint_ = last_safepoint_; + first_safepoint_ = last_safepoint_ = nullptr; + } else if (last_safepoint_ != new_last_safepoint) { + new_interval->last_safepoint_ = last_safepoint_; + new_interval->first_safepoint_ = new_last_safepoint->GetNext(); + DCHECK(new_interval->first_safepoint_ != nullptr); + last_safepoint_ = new_last_safepoint; + last_safepoint_->SetNext(nullptr); + } + new_interval->next_sibling_ = next_sibling_; next_sibling_ = new_interval; new_interval->parent_ = parent_; @@ -703,6 +758,21 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { UNREACHABLE(); } + void AddSafepoint(HInstruction* instruction) { + SafepointPosition* safepoint = new (allocator_) SafepointPosition(instruction); + if (first_safepoint_ == nullptr) { + first_safepoint_ = last_safepoint_ = safepoint; + } else { + DCHECK_LT(last_safepoint_->GetPosition(), safepoint->GetPosition()); + last_safepoint_->SetNext(safepoint); + last_safepoint_ = safepoint; + } + } + + SafepointPosition* GetFirstSafepoint() const { + return first_safepoint_; + } + private: LiveInterval(ArenaAllocator* allocator, Primitive::Type type, @@ -715,6 +785,8 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { : allocator_(allocator), first_range_(nullptr), last_range_(nullptr), + first_safepoint_(nullptr), + last_safepoint_(nullptr), last_visited_range_(nullptr), first_use_(nullptr), type_(type), @@ -771,6 +843,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { LiveRange* first_range_; LiveRange* last_range_; + // Safepoints where this interval is live. + SafepointPosition* first_safepoint_; + SafepointPosition* last_safepoint_; + // Last visited range. This is a range search optimization leveraging the fact // that the register allocator does a linear scan through the intervals. LiveRange* last_visited_range_; @@ -838,15 +914,14 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { */ class SsaLivenessAnalysis : public ValueObject { public: - SsaLivenessAnalysis(const HGraph& graph, CodeGenerator* codegen) + SsaLivenessAnalysis(HGraph* graph, CodeGenerator* codegen) : graph_(graph), codegen_(codegen), - linear_order_(graph.GetArena(), graph.GetBlocks().Size()), - block_infos_(graph.GetArena(), graph.GetBlocks().Size()), - instructions_from_ssa_index_(graph.GetArena(), 0), - instructions_from_lifetime_position_(graph.GetArena(), 0), + block_infos_(graph->GetArena(), graph->GetBlocks().Size()), + instructions_from_ssa_index_(graph->GetArena(), 0), + instructions_from_lifetime_position_(graph->GetArena(), 0), number_of_ssa_values_(0) { - block_infos_.SetSize(graph.GetBlocks().Size()); + block_infos_.SetSize(graph->GetBlocks().Size()); } void Analyze(); @@ -863,10 +938,6 @@ class SsaLivenessAnalysis : public ValueObject { return &block_infos_.Get(block.GetBlockId())->kill_; } - const GrowableArray<HBasicBlock*>& GetLinearOrder() const { - return linear_order_; - } - HInstruction* GetInstructionFromSsaIndex(size_t index) const { return instructions_from_ssa_index_.Get(index); } @@ -934,9 +1005,8 @@ class SsaLivenessAnalysis : public ValueObject { return instruction->GetType() == Primitive::kPrimNot; } - const HGraph& graph_; + HGraph* const graph_; CodeGenerator* const codegen_; - GrowableArray<HBasicBlock*> linear_order_; GrowableArray<BlockInfo*> block_infos_; // Temporary array used when computing live_in, live_out, and kill sets. @@ -949,43 +1019,6 @@ class SsaLivenessAnalysis : public ValueObject { DISALLOW_COPY_AND_ASSIGN(SsaLivenessAnalysis); }; -class HLinearPostOrderIterator : public ValueObject { - public: - explicit HLinearPostOrderIterator(const SsaLivenessAnalysis& liveness) - : order_(liveness.GetLinearOrder()), index_(liveness.GetLinearOrder().Size()) {} - - bool Done() const { return index_ == 0; } - - HBasicBlock* Current() const { return order_.Get(index_ -1); } - - void Advance() { - --index_; - DCHECK_GE(index_, 0U); - } - - private: - const GrowableArray<HBasicBlock*>& order_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HLinearPostOrderIterator); -}; - -class HLinearOrderIterator : public ValueObject { - public: - explicit HLinearOrderIterator(const SsaLivenessAnalysis& liveness) - : order_(liveness.GetLinearOrder()), index_(0) {} - - bool Done() const { return index_ == order_.Size(); } - HBasicBlock* Current() const { return order_.Get(index_); } - void Advance() { ++index_; } - - private: - const GrowableArray<HBasicBlock*>& order_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator); -}; - } // namespace art #endif // ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_ |