diff options
Diffstat (limited to 'compiler/optimizing')
21 files changed, 320 insertions, 215 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 6342f91684..bdbd571133 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -105,6 +105,25 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { DISALLOW_COPY_AND_ASSIGN(SlowPathCode); }; +class InvokeDexCallingConventionVisitor { + public: + virtual Location GetNextLocation(Primitive::Type type) = 0; + + protected: + InvokeDexCallingConventionVisitor() {} + virtual ~InvokeDexCallingConventionVisitor() {} + + // The current index for core registers. + uint32_t gp_index_ = 0u; + // The current index for floating-point registers. + uint32_t float_index_ = 0u; + // The current stack index. + uint32_t stack_index_ = 0u; + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); +}; + class CodeGenerator { public: // Compiles the graph to executable instructions. Returns whether the compilation @@ -308,6 +327,7 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + virtual ParallelMoveResolver* GetMoveResolver() = 0; protected: CodeGenerator(HGraph* graph, @@ -351,7 +371,6 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; - virtual ParallelMoveResolver* GetMoveResolver() = 0; virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 159bd30e45..e4c37deb8b 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -605,7 +605,7 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { UNREACHABLE(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -680,7 +680,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type return Location(); } -Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -1288,7 +1288,7 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(R0)); - InvokeDexCallingConventionVisitor calling_convention_visitor; + InvokeDexCallingConventionVisitorARM calling_convention_visitor; for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 600903621d..1a498e1148 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -78,22 +78,19 @@ class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorARM : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() - : gp_index_(0), float_index_(0), double_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorARM() {} + virtual ~InvokeDexCallingConventionVisitorARM() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; Location GetReturnLocation(Primitive::Type type); private: InvokeDexCallingConvention calling_convention; - uint32_t gp_index_; - uint32_t float_index_; - uint32_t double_index_; - uint32_t stack_index_; + uint32_t double_index_ = 0; - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM); }; class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap { @@ -151,7 +148,7 @@ class LocationsBuilderARM : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorARM* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorARM parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM); }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 946ffc8ea8..9e02a1d850 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -372,15 +372,15 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { #undef __ -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { Location next_location; if (type == Primitive::kPrimVoid) { LOG(FATAL) << "Unreachable type " << type; } if (Primitive::IsFloatingPointType(type) && - (fp_index_ < calling_convention.GetNumberOfFpuRegisters())) { - next_location = LocationFrom(calling_convention.GetFpuRegisterAt(fp_index_++)); + (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { + next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); } else if (!Primitive::IsFloatingPointType(type) && (gp_index_ < calling_convention.GetNumberOfRegisters())) { next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); @@ -1907,7 +1907,7 @@ void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(LocationFrom(x0)); - InvokeDexCallingConventionVisitor calling_convention_visitor; + InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 5a358671cc..8aeea5400f 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -119,25 +119,20 @@ class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorARM64() {} + virtual ~InvokeDexCallingConventionVisitorARM64() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; Location GetReturnLocation(Primitive::Type return_type) { return calling_convention.GetReturnLocation(return_type); } private: InvokeDexCallingConvention calling_convention; - // The current index for core registers. - uint32_t gp_index_; - // The current index for floating-point registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64); }; class InstructionCodeGeneratorARM64 : public HGraphVisitor { @@ -193,7 +188,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { void HandleShift(HBinaryOperation* instr); CodeGeneratorARM64* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorARM64 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 7df4b53cbe..5ee091f92c 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -551,7 +551,7 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { UNREACHABLE(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -582,7 +582,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimFloat: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_++; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -592,7 +592,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimDouble: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_ += 2; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -1238,7 +1238,7 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(EAX)); - InvokeDexCallingConventionVisitor calling_convention_visitor; + InvokeDexCallingConventionVisitorX86 calling_convention_visitor; for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 8bd3cd3585..79dec7a1ac 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -75,22 +75,17 @@ class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegiste DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorX86() {} + virtual ~InvokeDexCallingConventionVisitorX86() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; private: InvokeDexCallingConvention calling_convention; - // The current index for cpu registers. - uint32_t gp_index_; - // The current index for fpu registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86); }; class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { @@ -137,7 +132,7 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorX86* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorX86 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); }; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 37b00c8d52..5ac68668ba 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1240,7 +1240,7 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { codegen_->GenerateFrameExit(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -1270,7 +1270,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimFloat: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_++; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -1280,7 +1280,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimDouble: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_ += 2; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -1338,7 +1338,7 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(RDI)); - InvokeDexCallingConventionVisitor calling_convention_visitor; + InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index e3fd5d7ec1..13f9c46b5e 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -68,22 +68,17 @@ class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegis DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorX86_64() {} + virtual ~InvokeDexCallingConventionVisitorX86_64() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; private: InvokeDexCallingConvention calling_convention; - // The current index for cpu registers. - uint32_t gp_index_; - // The current index for fpu registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); }; class CodeGeneratorX86_64; @@ -147,7 +142,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction); CodeGeneratorX86_64* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); }; diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 91cd60acce..cd427c5ed8 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -65,10 +65,13 @@ void HDeadCodeElimination::RemoveDeadBlocks() { for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (live_blocks.IsBitSet(block->GetBlockId())) { - continue; + // If this block is part of a loop that is being dismantled, we need to + // update its loop information. + block->UpdateLoopInformation(); + } else { + MaybeRecordDeadBlock(block); + block->DisconnectAndDelete(); } - MaybeRecordDeadBlock(block); - block->DisconnectAndDelete(); } // Connect successive blocks created by dead branches. Order does not matter. diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 5d3db5c6f2..43fe3746ad 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -374,4 +374,3 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) } } // namespace art - diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index dbb7cbaa98..c243ef3f8b 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -17,8 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#include "code_generator.h" #include "nodes.h" #include "optimization.h" +#include "parallel_move_resolver.h" namespace art { @@ -76,6 +78,38 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + static void MoveArguments(HInvoke* invoke, + CodeGenerator* codegen, + InvokeDexCallingConventionVisitor* calling_convention_visitor) { + if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) { + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); + } + + if (invoke->GetNumberOfArguments() == 0) { + // No argument to move. + return; + } + + LocationSummary* locations = invoke->GetLocations(); + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } + protected: IntrinsicVisitor() {} diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index e3fa272d22..7f7b450003 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -77,28 +77,9 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -117,7 +98,7 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d71b49e6f1..ca3de99092 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -86,28 +86,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { + InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -126,7 +107,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 18fb3c4d43..1eef1eff0b 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -111,28 +111,9 @@ static void MoveFromReturnRegister(Location target, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { + InvokeDexCallingConventionVisitorX86 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -155,7 +136,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); @@ -749,7 +730,7 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index db7b58bc66..1fc5432a89 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -103,28 +103,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -143,7 +124,7 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); @@ -623,7 +604,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index e2eb46aabb..f07f4c7590 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -376,7 +376,6 @@ bool HLoopInformation::Populate() { } HBasicBlock* HLoopInformation::GetPreHeader() const { - DCHECK_EQ(header_->GetPredecessors().Size(), 2u); return header_->GetDominator(); } @@ -449,6 +448,20 @@ void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstructio instructions_.InsertInstructionBefore(instruction, cursor); } +void HBasicBlock::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) { + DCHECK(!cursor->IsPhi()); + DCHECK(!instruction->IsPhi()); + DCHECK_EQ(instruction->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + DCHECK(!instruction->IsControlFlow()); + DCHECK(!cursor->IsControlFlow()); + instruction->SetBlock(this); + instruction->SetId(GetGraph()->GetNextInstructionId()); + UpdateInputsUsers(instruction); + instructions_.InsertInstructionAfter(instruction, cursor); +} + void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { DCHECK_EQ(phi->GetId(), -1); DCHECK_NE(cursor->GetId(), -1); @@ -1024,6 +1037,20 @@ void HBasicBlock::DisconnectAndDelete() { SetGraph(nullptr); } +void HBasicBlock::UpdateLoopInformation() { + // Check if loop information points to a dismantled loop. If so, replace with + // the loop information of a larger loop which contains this block, or nullptr + // otherwise. We iterate in case the larger loop has been destroyed too. + while (IsInLoop() && loop_information_->GetBackEdges().IsEmpty()) { + if (IsLoopHeader()) { + HSuspendCheck* suspend_check = loop_information_->GetSuspendCheck(); + DCHECK_EQ(suspend_check->GetBlock(), this); + RemoveInstruction(suspend_check); + } + loop_information_ = loop_information_->GetPreHeader()->GetLoopInformation(); + } +} + void HBasicBlock::MergeWith(HBasicBlock* other) { DCHECK_EQ(GetGraph(), other->GetGraph()); DCHECK(GetDominatedBlocks().Contains(other)); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index f64086e607..50eecffc57 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -397,6 +397,11 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { return back_edges_; } + HBasicBlock* GetSingleBackEdge() const { + DCHECK_EQ(back_edges_.Size(), 1u); + return back_edges_.Get(0); + } + void ClearBackEdges() { back_edges_.Reset(); } @@ -620,7 +625,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void DisconnectAndDelete(); void AddInstruction(HInstruction* instruction); + // Insert `instruction` before/after an existing instruction `cursor`. void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); + void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor); // Replace instruction `initial` with `replacement` within this block. void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); @@ -634,7 +641,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true); bool IsLoopHeader() const { - return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); + return IsInLoop() && (loop_information_->GetHeader() == this); } bool IsLoopPreHeaderFirstPredecessor() const { @@ -653,7 +660,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void SetInLoop(HLoopInformation* info) { if (IsLoopHeader()) { // Nothing to do. This just means `info` is an outer loop. - } else if (loop_information_ == nullptr) { + } else if (!IsInLoop()) { loop_information_ = info; } else if (loop_information_->Contains(*info->GetHeader())) { // Block is currently part of an outer loop. Make it part of this inner loop. @@ -672,6 +679,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { loop_information_ = info; } + // Checks if the loop information points to a valid loop. If the loop has been + // dismantled (does not have a back edge any more), loop information is + // removed or replaced with the information of the first valid outer loop. + void UpdateLoopInformation(); + bool IsInLoop() const { return loop_information_ != nullptr; } // Returns wheter this block dominates the blocked passed as parameter. @@ -725,7 +737,7 @@ class HLoopInformationOutwardIterator : public ValueObject { void Advance() { DCHECK(!Done()); - current_ = current_->GetHeader()->GetDominator()->GetLoopInformation(); + current_ = current_->GetPreHeader()->GetLoopInformation(); } HLoopInformation* Current() const { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a8d006f104..812642b1b2 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1467,23 +1467,28 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LiveRange* range = current->GetFirstRange(); while (range != nullptr) { - DCHECK(use == nullptr || use->GetPosition() >= range->GetStart()); + while (use != nullptr && use->GetPosition() < range->GetStart()) { + DCHECK(use->IsSynthesized()); + use = use->GetNext(); + } while (use != nullptr && use->GetPosition() <= range->GetEnd()) { DCHECK(!use->GetIsEnvironment()); DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); - LocationSummary* locations = use->GetUser()->GetLocations(); - Location expected_location = locations->InAt(use->GetInputIndex()); - // The expected (actual) location may be invalid in case the input is unused. Currently - // this only happens for intrinsics. - if (expected_location.IsValid()) { - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + if (!use->IsSynthesized()) { + LocationSummary* locations = use->GetUser()->GetLocations(); + Location expected_location = locations->InAt(use->GetInputIndex()); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } - } else { - DCHECK(use->GetUser()->IsInvoke()); - DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } use = use->GetNext(); } @@ -1561,7 +1566,13 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { current = next_sibling; } while (current != nullptr); - DCHECK(use == nullptr); + if (kIsDebugBuild) { + // Following uses can only be synthesized uses. + while (use != nullptr) { + DCHECK(use->IsSynthesized()); + use = use->GetNext(); + } + } } void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index b674f746b6..0bbcb308f3 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -341,7 +341,7 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - if (use_position >= start) { + if (use_position >= start && !use->IsSynthesized()) { HInstruction* user = use->GetUser(); size_t input_index = use->GetInputIndex(); if (user->IsPhi()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index b95276afd7..b74e65584f 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -112,12 +112,15 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { is_environment_(is_environment), position_(position), next_(next) { - DCHECK(user->IsPhi() + DCHECK((user == nullptr) + || user->IsPhi() || (GetPosition() == user->GetLifetimePosition() + 1) || (GetPosition() == user->GetLifetimePosition())); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } + static constexpr size_t kNoInput = -1; + size_t GetPosition() const { return position_; } UsePosition* GetNext() const { return next_; } @@ -126,14 +129,16 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { HInstruction* GetUser() const { return user_; } bool GetIsEnvironment() const { return is_environment_; } + bool IsSynthesized() const { return user_ == nullptr; } size_t GetInputIndex() const { return input_index_; } void Dump(std::ostream& stream) const { stream << position_; - if (is_environment_) { - stream << " (env)"; - } + } + + HLoopInformation* GetLoopInformation() const { + return user_->GetBlock()->GetLoopInformation(); } UsePosition* Dup(ArenaAllocator* allocator) const { @@ -142,6 +147,15 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { next_ == nullptr ? nullptr : next_->Dup(allocator)); } + bool RequiresRegister() const { + if (GetIsEnvironment()) return false; + if (IsSynthesized()) return false; + Location location = GetUser()->GetLocations()->InAt(GetInputIndex()); + return location.IsUnallocated() + && (location.GetPolicy() == Location::kRequiresRegister + || location.GetPolicy() == Location::kRequiresFpuRegister); + } + private: HInstruction* const user_; const size_t input_index_; @@ -240,9 +254,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // location of the input just before that instruction (and not potential moves due // to splitting). position = instruction->GetLifetimePosition(); + } else if (!locations->InAt(input_index).IsValid()) { + return; } } + if (!is_environment && instruction->IsInLoop()) { + AddBackEdgeUses(*instruction->GetBlock()); + } + DCHECK(position == instruction->GetLifetimePosition() || position == instruction->GetLifetimePosition() + 1); @@ -306,6 +326,9 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { DCHECK(instruction->IsPhi()); + if (block->IsInLoop()) { + AddBackEdgeUses(*block); + } first_use_ = new (allocator_) UsePosition( instruction, input_index, false, block->GetLifetimeEnd(), first_use_); } @@ -456,27 +479,9 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - LocationSummary* locations = defined_by_->GetLocations(); - Location location = locations->Out(); - // This interval is the first interval of the instruction. If the output - // of the instruction requires a register, we return the position of that instruction - // as the first register use. - if (location.IsUnallocated()) { - if ((location.GetPolicy() == Location::kRequiresRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && (locations->InAt(0).IsRegister() - || locations->InAt(0).IsRegisterPair() - || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { - return position; - } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { - return position; - } - } else if (location.IsRegister() || location.IsRegisterPair()) { - return position; - } + + if (IsDefiningPosition(position) && DefinitionRequiresRegister()) { + return position; } UsePosition* use = first_use_; @@ -484,10 +489,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); if (use_position > position) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - if (location.IsUnallocated() - && (location.GetPolicy() == Location::kRequiresRegister - || location.GetPolicy() == Location::kRequiresFpuRegister)) { + if (use->RequiresRegister()) { return use_position; } } @@ -505,18 +507,16 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - if (defined_by_->GetLocations()->Out().IsValid()) { - return position; - } + if (IsDefiningPosition(position)) { + DCHECK(defined_by_->GetLocations()->Out().IsValid()); + return position; } UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); size_t use_position = use->GetPosition(); - if (use_position > position && location.IsValid()) { + if (use_position > position) { return use_position; } use = use->GetNext(); @@ -664,7 +664,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { stream << " "; } while ((use = use->GetNext()) != nullptr); } - stream << "}, {"; + stream << "}, { "; use = first_env_use_; if (use != nullptr) { do { @@ -910,6 +910,100 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return range; } + bool DefinitionRequiresRegister() const { + DCHECK(IsParent()); + LocationSummary* locations = defined_by_->GetLocations(); + Location location = locations->Out(); + // This interval is the first interval of the instruction. If the output + // of the instruction requires a register, we return the position of that instruction + // as the first register use. + if (location.IsUnallocated()) { + if ((location.GetPolicy() == Location::kRequiresRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { + return true; + } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsFpuRegister() + || locations->InAt(0).IsFpuRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) { + return true; + } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return true; + } + return false; + } + + bool IsDefiningPosition(size_t position) const { + return IsParent() && (position == GetStart()); + } + + bool HasSynthesizeUseAt(size_t position) const { + UsePosition* use = first_use_; + while (use != nullptr) { + size_t use_position = use->GetPosition(); + if ((use_position == position) && use->IsSynthesized()) { + return true; + } + if (use_position > position) break; + use = use->GetNext(); + } + return false; + } + + void AddBackEdgeUses(const HBasicBlock& block_at_use) { + DCHECK(block_at_use.IsInLoop()); + // Add synthesized uses at the back edge of loops to help the register allocator. + // Note that this method is called in decreasing liveness order, to faciliate adding + // uses at the head of the `first_use_` linked list. Because below + // we iterate from inner-most to outer-most, which is in increasing liveness order, + // we need to take extra care of how the `first_use_` linked list is being updated. + UsePosition* first_in_new_list = nullptr; + UsePosition* last_in_new_list = nullptr; + for (HLoopInformationOutwardIterator it(block_at_use); + !it.Done(); + it.Advance()) { + HLoopInformation* current = it.Current(); + if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) { + // This interval is defined in the loop. We can stop going outward. + break; + } + + size_t back_edge_use_position = current->GetSingleBackEdge()->GetLifetimeEnd(); + if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) { + // There was a use already seen in this loop. Therefore the previous call to `AddUse` + // already inserted the backedge use. We can stop going outward. + DCHECK(HasSynthesizeUseAt(back_edge_use_position)); + break; + } + + DCHECK(last_in_new_list == nullptr + || back_edge_use_position > last_in_new_list->GetPosition()); + + UsePosition* new_use = new (allocator_) UsePosition( + nullptr, UsePosition::kNoInput, /* is_environment */ false, + back_edge_use_position, nullptr); + + if (last_in_new_list != nullptr) { + // Going outward. The latest created use needs to point to the new use. + last_in_new_list->SetNext(new_use); + } else { + // This is the inner-most loop. + DCHECK_EQ(current, block_at_use.GetLoopInformation()); + first_in_new_list = new_use; + } + last_in_new_list = new_use; + } + // Link the newly created linked list with `first_use_`. + if (last_in_new_list != nullptr) { + last_in_new_list->SetNext(first_use_); + first_use_ = first_in_new_list; + } + } + ArenaAllocator* const allocator_; // Ranges of this interval. We need a quick access to the last range to test |