diff options
Diffstat (limited to 'compiler/optimizing/inliner.cc')
| -rw-r--r-- | compiler/optimizing/inliner.cc | 1539 |
1 files changed, 1095 insertions, 444 deletions
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index c8a983b2bd..4f6ca17de0 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -17,10 +17,12 @@ #include "inliner.h" #include "art_method-inl.h" +#include "base/enums.h" #include "builder.h" #include "class_linker.h" #include "constant_folding.h" #include "dead_code_elimination.h" +#include "dex/inline_method_analyser.h" #include "dex/verified_method.h" #include "dex/verification_results.h" #include "driver/compiler_driver-inl.h" @@ -35,71 +37,135 @@ #include "nodes.h" #include "optimizing_compiler.h" #include "reference_type_propagation.h" -#include "register_allocator.h" -#include "quick/inline_method_analyser.h" +#include "register_allocator_linear_scan.h" #include "sharpening.h" #include "ssa_builder.h" #include "ssa_phi_elimination.h" -#include "scoped_thread_state_change.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" namespace art { -static constexpr size_t kMaximumNumberOfHInstructions = 32; +// Instruction limit to control memory. +static constexpr size_t kMaximumNumberOfTotalInstructions = 1024; + +// Maximum number of instructions for considering a method small, +// which we will always try to inline if the other non-instruction limits +// are not reached. +static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3; // Limit the number of dex registers that we accumulate while inlining // to avoid creating large amount of nested environments. -static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; - -// Avoid inlining within a huge method due to memory pressure. -static constexpr size_t kMaximumCodeUnitSize = 4096; +static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 32; + +// Limit recursive call inlining, which do not benefit from too +// much inlining compared to code locality. +static constexpr size_t kMaximumNumberOfRecursiveCalls = 4; + +// Controls the use of inline caches in AOT mode. +static constexpr bool kUseAOTInlineCaches = true; + +// We check for line numbers to make sure the DepthString implementation +// aligns the output nicely. +#define LOG_INTERNAL(msg) \ + static_assert(__LINE__ > 10, "Unhandled line number"); \ + static_assert(__LINE__ < 10000, "Unhandled line number"); \ + VLOG(compiler) << DepthString(__LINE__) << msg + +#define LOG_TRY() LOG_INTERNAL("Try inlinining call: ") +#define LOG_NOTE() LOG_INTERNAL("Note: ") +#define LOG_SUCCESS() LOG_INTERNAL("Success: ") +#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ") +#define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ") + +std::string HInliner::DepthString(int line) const { + std::string value; + // Indent according to the inlining depth. + size_t count = depth_; + // Line numbers get printed in the log, so add a space if the log's line is less + // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright. + if (!kIsTargetBuild) { + if (line < 100) { + value += " "; + } + if (line < 1000) { + value += " "; + } + // Safeguard if this file reaches more than 10000 lines. + DCHECK_LT(line, 10000); + } + for (size_t i = 0; i < count; ++i) { + value += " "; + } + return value; +} -void HInliner::Run() { - const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); - if ((compiler_options.GetInlineDepthLimit() == 0) - || (compiler_options.GetInlineMaxCodeUnits() == 0)) { - return; +static size_t CountNumberOfInstructions(HGraph* graph) { + size_t number_of_instructions = 0; + for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) { + for (HInstructionIterator instr_it(block->GetInstructions()); + !instr_it.Done(); + instr_it.Advance()) { + ++number_of_instructions; + } } - if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) { - return; + return number_of_instructions; +} + +void HInliner::UpdateInliningBudget() { + if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) { + // Always try to inline small methods. + inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod; + } else { + inlining_budget_ = std::max( + kMaximumNumberOfInstructionsForSmallMethod, + kMaximumNumberOfTotalInstructions - total_number_of_instructions_); } +} + +void HInliner::Run() { if (graph_->IsDebuggable()) { // For simplicity, we currently never inline when the graph is debuggable. This avoids // doing some logic in the runtime to discover if a method could have been inlined. return; } - const ArenaVector<HBasicBlock*>& blocks = graph_->GetReversePostOrder(); + + // Initialize the number of instructions for the method being compiled. Recursive calls + // to HInliner::Run have already updated the instruction count. + if (outermost_graph_ == graph_) { + total_number_of_instructions_ = CountNumberOfInstructions(graph_); + } + + UpdateInliningBudget(); + DCHECK_NE(total_number_of_instructions_, 0u); + DCHECK_NE(inlining_budget_, 0u); + + // Keep a copy of all blocks when starting the visit. + ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder(); DCHECK(!blocks.empty()); - HBasicBlock* next_block = blocks[0]; - for (size_t i = 0; i < blocks.size(); ++i) { - // Because we are changing the graph when inlining, we need to remember the next block. - // This avoids doing the inlining work again on the inlined blocks. - if (blocks[i] != next_block) { - continue; - } - HBasicBlock* block = next_block; - next_block = (i == blocks.size() - 1) ? nullptr : blocks[i + 1]; + // Because we are changing the graph when inlining, + // we just iterate over the blocks of the outer method. + // This avoids doing the inlining work again on the inlined blocks. + for (HBasicBlock* block : blocks) { for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { HInstruction* next = instruction->GetNext(); HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { - // We use the original invoke type to ensure the resolution of the called method - // works properly. - if (!TryInline(call)) { - if (kIsDebugBuild && IsCompilingWithCoreImage()) { - std::string callee_name = - PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); - bool should_inline = callee_name.find("$inline$") != std::string::npos; - CHECK(!should_inline) << "Could not inline " << callee_name; + if (kIsDebugBuild && IsCompilingWithCoreImage()) { + // Debugging case: directives in method names control or assert on inlining. + std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod( + call->GetDexMethodIndex(), /* with_signature */ false); + // Tests prevent inlining by having $noinline$ in their method names. + if (callee_name.find("$noinline$") == std::string::npos) { + if (!TryInline(call)) { + bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos); + CHECK(!should_have_inlined) << "Could not inline " << callee_name; + } } } else { - if (kIsDebugBuild && IsCompilingWithCoreImage()) { - std::string callee_name = - PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); - bool must_not_inline = callee_name.find("$noinline$") != std::string::npos; - CHECK(!must_not_inline) << "Should not have inlined " << callee_name; - } + // Normal case: try to inline. + TryInline(call); } } instruction = next; @@ -108,7 +174,7 @@ void HInliner::Run() { } static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { return method->IsFinal() || method->GetDeclaringClass()->IsFinal(); } @@ -118,7 +184,7 @@ static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) * Return nullptr if the runtime target cannot be proven. */ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resolved_method) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { if (IsMethodOrDeclaringClassFinal(resolved_method)) { // No need to lookup further, the resolved method will be the target. return resolved_method; @@ -151,7 +217,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol } ClassLinker* cl = Runtime::Current()->GetClassLinker(); - size_t pointer_size = cl->GetImagePointerSize(); + PointerSize pointer_size = cl->GetImagePointerSize(); if (invoke->IsInvokeInterface()) { resolved_method = info.GetTypeHandle()->FindVirtualMethodForInterface( resolved_method, pointer_size); @@ -185,34 +251,42 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol } } -static uint32_t FindClassIndexIn(mirror::Class* cls, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) - SHARED_REQUIRES(Locks::mutator_lock_) { - uint32_t index = DexFile::kDexNoIndex; +static uint32_t FindMethodIndexIn(ArtMethod* method, + const DexFile& dex_file, + uint32_t name_and_signature_index) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (IsSameDexFile(*method->GetDexFile(), dex_file)) { + return method->GetDexMethodIndex(); + } else { + return method->FindDexMethodIndexInOtherDexFile(dex_file, name_and_signature_index); + } +} + +static dex::TypeIndex FindClassIndexIn(mirror::Class* cls, + const DexCompilationUnit& compilation_unit) + REQUIRES_SHARED(Locks::mutator_lock_) { + const DexFile& dex_file = *compilation_unit.GetDexFile(); + dex::TypeIndex index; if (cls->GetDexCache() == nullptr) { - DCHECK(cls->IsArrayClass()) << PrettyClass(cls); + DCHECK(cls->IsArrayClass()) << cls->PrettyClass(); index = cls->FindTypeIndexInOtherDexFile(dex_file); - } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) { - DCHECK(cls->IsProxyClass()) << PrettyClass(cls); + } else if (!cls->GetDexTypeIndex().IsValid()) { + DCHECK(cls->IsProxyClass()) << cls->PrettyClass(); // TODO: deal with proxy classes. } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) { - DCHECK_EQ(cls->GetDexCache(), dex_cache.Get()); + DCHECK_EQ(cls->GetDexCache(), compilation_unit.GetDexCache().Get()); index = cls->GetDexTypeIndex(); - // Update the dex cache to ensure the class is in. The generated code will - // consider it is. We make it safe by updating the dex cache, as other - // dex files might also load the class, and there is no guarantee the dex - // cache of the dex file of the class will be updated. - if (dex_cache->GetResolvedType(index) == nullptr) { - dex_cache->SetResolvedType(index, cls); - } } else { index = cls->FindTypeIndexInOtherDexFile(dex_file); - // We cannot guarantee the entry in the dex cache will resolve to the same class, + // We cannot guarantee the entry will resolve to the same class, // as there may be different class loaders. So only return the index if it's - // the right class in the dex cache already. - if (index != DexFile::kDexNoIndex && dex_cache->GetResolvedType(index) != cls) { - index = DexFile::kDexNoIndex; + // the right class already resolved with the class loader. + if (index.IsValid()) { + ObjPtr<mirror::Class> resolved = ClassLinker::LookupResolvedType( + index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get()); + if (resolved != cls) { + index = dex::TypeIndex::Invalid(); + } } } @@ -232,7 +306,7 @@ class ScopedProfilingInfoInlineUse { ~ScopedProfilingInfoInlineUse() { if (profiling_info_ != nullptr) { - size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); DCHECK_EQ(profiling_info_, method_->GetProfilingInfo(pointer_size)); Runtime::Current()->GetJit()->GetCodeCache()->DoneCompilerUse(method_, self_); } @@ -246,100 +320,372 @@ class ScopedProfilingInfoInlineUse { ProfilingInfo* const profiling_info_; }; +HInliner::InlineCacheType HInliner::GetInlineCacheType( + const Handle<mirror::ObjectArray<mirror::Class>>& classes) + REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t number_of_types = 0; + for (; number_of_types < InlineCache::kIndividualCacheSize; ++number_of_types) { + if (classes->Get(number_of_types) == nullptr) { + break; + } + } + + if (number_of_types == 0) { + return kInlineCacheUninitialized; + } else if (number_of_types == 1) { + return kInlineCacheMonomorphic; + } else if (number_of_types == InlineCache::kIndividualCacheSize) { + return kInlineCacheMegamorphic; + } else { + return kInlineCachePolymorphic; + } +} + +static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(classes->Get(0) != nullptr); + return classes->Get(0); +} + +ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { + if (!resolved_method->HasSingleImplementation()) { + return nullptr; + } + if (Runtime::Current()->IsAotCompiler()) { + // No CHA-based devirtulization for AOT compiler (yet). + return nullptr; + } + if (outermost_graph_->IsCompilingOsr()) { + // We do not support HDeoptimize in OSR methods. + return nullptr; + } + PointerSize pointer_size = caller_compilation_unit_.GetClassLinker()->GetImagePointerSize(); + ArtMethod* single_impl = resolved_method->GetSingleImplementation(pointer_size); + if (single_impl == nullptr) { + return nullptr; + } + if (single_impl->IsProxyMethod()) { + // Proxy method is a generic invoker that's not worth + // devirtualizing/inlining. It also causes issues when the proxy + // method is in another dex file if we try to rewrite invoke-interface to + // invoke-virtual because a proxy method doesn't have a real dex file. + return nullptr; + } + if (!single_impl->GetDeclaringClass()->IsResolved()) { + // There's a race with the class loading, which updates the CHA info + // before setting the class to resolved. So we just bail for this + // rare occurence. + return nullptr; + } + return single_impl; +} + bool HInliner::TryInline(HInvoke* invoke_instruction) { - if (invoke_instruction->IsInvokeUnresolved()) { - return false; // Don't bother to move further if we know the method is unresolved. + if (invoke_instruction->IsInvokeUnresolved() || + invoke_instruction->IsInvokePolymorphic()) { + return false; // Don't bother to move further if we know the method is unresolved or an + // invoke-polymorphic. } - uint32_t method_index = invoke_instruction->GetDexMethodIndex(); ScopedObjectAccess soa(Thread::Current()); + uint32_t method_index = invoke_instruction->GetDexMethodIndex(); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - VLOG(compiler) << "Try inlining " << PrettyMethod(method_index, caller_dex_file); + LOG_TRY() << caller_dex_file.PrettyMethod(method_index); - ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); - // We can query the dex cache directly. The verifier has populated it already. - ArtMethod* resolved_method; + ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod(); + if (resolved_method == nullptr) { + DCHECK(invoke_instruction->IsInvokeStaticOrDirect()); + DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()); + LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method"; + return false; + } ArtMethod* actual_method = nullptr; + if (invoke_instruction->IsInvokeStaticOrDirect()) { - if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) { - VLOG(compiler) << "Not inlining a String.<init> method"; - return false; - } - MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod(); - mirror::DexCache* const dex_cache = IsSameDexFile(caller_dex_file, *ref.dex_file) - ? caller_compilation_unit_.GetDexCache().Get() - : class_linker->FindDexCache(soa.Self(), *ref.dex_file); - resolved_method = dex_cache->GetResolvedMethod( - ref.dex_method_index, class_linker->GetImagePointerSize()); - // actual_method == resolved_method for direct or static calls. actual_method = resolved_method; } else { - resolved_method = caller_compilation_unit_.GetDexCache().Get()->GetResolvedMethod( - method_index, class_linker->GetImagePointerSize()); - if (resolved_method != nullptr) { - // Check if we can statically find the method. - actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); - } + // Check if we can statically find the method. + actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); } - if (resolved_method == nullptr) { - // TODO: Can this still happen? - // Method cannot be resolved if it is in another dex file we do not have access to. - VLOG(compiler) << "Method cannot be resolved " << PrettyMethod(method_index, caller_dex_file); - return false; + bool cha_devirtualize = false; + if (actual_method == nullptr) { + ArtMethod* method = TryCHADevirtualization(resolved_method); + if (method != nullptr) { + cha_devirtualize = true; + actual_method = method; + LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod(); + } } if (actual_method != nullptr) { - bool result = TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ true); + bool result = TryInlineAndReplace(invoke_instruction, + actual_method, + ReferenceTypeInfo::CreateInvalid(), + /* do_rtp */ true, + cha_devirtualize); if (result && !invoke_instruction->IsInvokeStaticOrDirect()) { - MaybeRecordStat(kInlinedInvokeVirtualOrInterface); + if (cha_devirtualize) { + // Add dependency due to devirtulization. We've assumed resolved_method + // has single implementation. + outermost_graph_->AddCHASingleImplementationDependency(resolved_method); + MaybeRecordStat(kCHAInline); + } else { + MaybeRecordStat(kInlinedInvokeVirtualOrInterface); + } } return result; } - DCHECK(!invoke_instruction->IsInvokeStaticOrDirect()); - // Check if we can use an inline cache. - ArtMethod* caller = graph_->GetArtMethod(); - if (Runtime::Current()->UseJitCompilation()) { - // Under JIT, we should always know the caller. - DCHECK(caller != nullptr); - ScopedProfilingInfoInlineUse spiis(caller, soa.Self()); - ProfilingInfo* profiling_info = spiis.GetProfilingInfo(); - if (profiling_info != nullptr) { - const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()); - if (ic.IsUninitialized()) { - VLOG(compiler) << "Interface or virtual call to " - << PrettyMethod(method_index, caller_dex_file) - << " is not hit and not inlined"; - return false; - } else if (ic.IsMonomorphic()) { - MaybeRecordStat(kMonomorphicCall); - if (outermost_graph_->IsCompilingOsr()) { - // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the - // interpreter and it may have seen different receiver types. - return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic); - } else { - return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic); - } - } else if (ic.IsPolymorphic()) { - MaybeRecordStat(kPolymorphicCall); - return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic); + // Try using inline caches. + return TryInlineFromInlineCache(caller_dex_file, invoke_instruction, resolved_method); +} + +static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder( + const DexCompilationUnit& compilation_unit, + StackHandleScope<1>* hs) + REQUIRES_SHARED(Locks::mutator_lock_) { + Thread* self = Thread::Current(); + ClassLinker* class_linker = compilation_unit.GetClassLinker(); + Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle( + mirror::ObjectArray<mirror::Class>::Alloc( + self, + class_linker->GetClassRoot(ClassLinker::kClassArrayClass), + InlineCache::kIndividualCacheSize)); + if (inline_cache == nullptr) { + // We got an OOME. Just clear the exception, and don't inline. + DCHECK(self->IsExceptionPending()); + self->ClearException(); + VLOG(compiler) << "Out of memory in the compiler when trying to inline"; + } + return inline_cache; +} + +bool HInliner::UseOnlyPolymorphicInliningWithNoDeopt() { + // If we are compiling AOT or OSR, pretend the call using inline caches is polymorphic and + // do not generate a deopt. + // + // For AOT: + // Generating a deopt does not ensure that we will actually capture the new types; + // and the danger is that we could be stuck in a loop with "forever" deoptimizations. + // Take for example the following scenario: + // - we capture the inline cache in one run + // - the next run, we deoptimize because we miss a type check, but the method + // never becomes hot again + // In this case, the inline cache will not be updated in the profile and the AOT code + // will keep deoptimizing. + // Another scenario is if we use profile compilation for a process which is not allowed + // to JIT (e.g. system server). If we deoptimize we will run interpreted code for the + // rest of the lifetime. + // TODO(calin): + // This is a compromise because we will most likely never update the inline cache + // in the profile (unless there's another reason to deopt). So we might be stuck with + // a sub-optimal inline cache. + // We could be smarter when capturing inline caches to mitigate this. + // (e.g. by having different thresholds for new and old methods). + // + // For OSR: + // We may come from the interpreter and it may have seen different receiver types. + return Runtime::Current()->IsAotCompiler() || outermost_graph_->IsCompilingOsr(); +} +bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, + HInvoke* invoke_instruction, + ArtMethod* resolved_method) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (Runtime::Current()->IsAotCompiler() && !kUseAOTInlineCaches) { + return false; + } + + StackHandleScope<1> hs(Thread::Current()); + Handle<mirror::ObjectArray<mirror::Class>> inline_cache; + InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler() + ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache) + : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache); + + switch (inline_cache_type) { + case kInlineCacheNoData: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " could not be statically determined"; + return false; + } + + case kInlineCacheUninitialized: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is not hit and not inlined"; + return false; + } + + case kInlineCacheMonomorphic: { + MaybeRecordStat(kMonomorphicCall); + if (UseOnlyPolymorphicInliningWithNoDeopt()) { + return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); } else { - DCHECK(ic.IsMegamorphic()); - VLOG(compiler) << "Interface or virtual call to " - << PrettyMethod(method_index, caller_dex_file) - << " is megamorphic and not inlined"; - MaybeRecordStat(kMegamorphicCall); - return false; + return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache); } } + + case kInlineCachePolymorphic: { + MaybeRecordStat(kPolymorphicCall); + return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); + } + + case kInlineCacheMegamorphic: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is megamorphic and not inlined"; + MaybeRecordStat(kMegamorphicCall); + return false; + } + + case kInlineCacheMissingTypes: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is missing types and not inlined"; + return false; + } } + UNREACHABLE(); +} - VLOG(compiler) << "Interface or virtual call to " - << PrettyMethod(method_index, caller_dex_file) - << " could not be statically determined"; - return false; +HInliner::InlineCacheType HInliner::GetInlineCacheJIT( + HInvoke* invoke_instruction, + StackHandleScope<1>* hs, + /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(Runtime::Current()->UseJitCompilation()); + + ArtMethod* caller = graph_->GetArtMethod(); + // Under JIT, we should always know the caller. + DCHECK(caller != nullptr); + ScopedProfilingInfoInlineUse spiis(caller, Thread::Current()); + ProfilingInfo* profiling_info = spiis.GetProfilingInfo(); + + if (profiling_info == nullptr) { + return kInlineCacheNoData; + } + + *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs); + if (inline_cache->Get() == nullptr) { + // We can't extract any data if we failed to allocate; + return kInlineCacheNoData; + } else { + Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto( + *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()), + *inline_cache); + return GetInlineCacheType(*inline_cache); + } +} + +HInliner::InlineCacheType HInliner::GetInlineCacheAOT( + const DexFile& caller_dex_file, + HInvoke* invoke_instruction, + StackHandleScope<1>* hs, + /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(Runtime::Current()->IsAotCompiler()); + const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo(); + if (pci == nullptr) { + return kInlineCacheNoData; + } + + std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> offline_profile = + pci->GetMethod(caller_dex_file.GetLocation(), + caller_dex_file.GetLocationChecksum(), + caller_compilation_unit_.GetDexMethodIndex()); + if (offline_profile == nullptr) { + return kInlineCacheNoData; // no profile information for this invocation. + } + + *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs); + if (inline_cache == nullptr) { + // We can't extract any data if we failed to allocate; + return kInlineCacheNoData; + } else { + return ExtractClassesFromOfflineProfile(invoke_instruction, + *(offline_profile.get()), + *inline_cache); + } +} + +HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( + const HInvoke* invoke_instruction, + const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile, + /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + const auto it = offline_profile.inline_caches->find(invoke_instruction->GetDexPc()); + if (it == offline_profile.inline_caches->end()) { + return kInlineCacheUninitialized; + } + + const ProfileCompilationInfo::DexPcData& dex_pc_data = it->second; + + if (dex_pc_data.is_missing_types) { + return kInlineCacheMissingTypes; + } + if (dex_pc_data.is_megamorphic) { + return kInlineCacheMegamorphic; + } + + DCHECK_LE(dex_pc_data.classes.size(), InlineCache::kIndividualCacheSize); + Thread* self = Thread::Current(); + // We need to resolve the class relative to the containing dex file. + // So first, build a mapping from the index of dex file in the profile to + // its dex cache. This will avoid repeating the lookup when walking over + // the inline cache types. + std::vector<ObjPtr<mirror::DexCache>> dex_profile_index_to_dex_cache( + offline_profile.dex_references.size()); + for (size_t i = 0; i < offline_profile.dex_references.size(); i++) { + bool found = false; + for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) { + if (offline_profile.dex_references[i].MatchesDex(dex_file)) { + dex_profile_index_to_dex_cache[i] = + caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file); + found = true; + } + } + if (!found) { + VLOG(compiler) << "Could not find profiled dex file: " + << offline_profile.dex_references[i].dex_location; + return kInlineCacheMissingTypes; + } + } + + // Walk over the classes and resolve them. If we cannot find a type we return + // kInlineCacheMissingTypes. + int ic_index = 0; + for (const ProfileCompilationInfo::ClassReference& class_ref : dex_pc_data.classes) { + ObjPtr<mirror::DexCache> dex_cache = + dex_profile_index_to_dex_cache[class_ref.dex_profile_index]; + DCHECK(dex_cache != nullptr); + + if (!dex_cache->GetDexFile()->IsTypeIndexValid(class_ref.type_index)) { + VLOG(compiler) << "Profile data corrupt: type index " << class_ref.type_index + << "is invalid in location" << dex_cache->GetDexFile()->GetLocation(); + return kInlineCacheNoData; + } + ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType( + class_ref.type_index, + dex_cache, + caller_compilation_unit_.GetClassLoader().Get()); + if (clazz != nullptr) { + inline_cache->Set(ic_index++, clazz); + } else { + VLOG(compiler) << "Could not resolve class from inline cache in AOT mode " + << caller_compilation_unit_.GetDexFile()->PrettyMethod( + invoke_instruction->GetDexMethodIndex()) << " : " + << caller_compilation_unit_ + .GetDexFile()->StringByTypeIdx(class_ref.type_index); + return kInlineCacheMissingTypes; + } + } + return GetInlineCacheType(inline_cache); } HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, @@ -349,68 +695,98 @@ HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet( receiver, + field, Primitive::kPrimNot, field->GetOffset(), field->IsVolatile(), field->GetDexFieldIndex(), field->GetDeclaringClass()->GetDexClassDefIndex(), *field->GetDexFile(), - handles_->NewHandle(field->GetDexCache()), dex_pc); // The class of a field is effectively final, and does not have any memory dependencies. result->SetSideEffects(SideEffects::None()); return result; } +static ArtMethod* ResolveMethodFromInlineCache(Handle<mirror::Class> klass, + ArtMethod* resolved_method, + HInstruction* invoke_instruction, + PointerSize pointer_size) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (Runtime::Current()->IsAotCompiler()) { + // We can get unrelated types when working with profiles (corruption, + // systme updates, or anyone can write to it). So first check if the class + // actually implements the declaring class of the method that is being + // called in bytecode. + // Note: the lookup methods used below require to have assignable types. + if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(klass.Get())) { + return nullptr; + } + } + + if (invoke_instruction->IsInvokeInterface()) { + resolved_method = klass->FindVirtualMethodForInterface(resolved_method, pointer_size); + } else { + DCHECK(invoke_instruction->IsInvokeVirtual()); + resolved_method = klass->FindVirtualMethodForVirtual(resolved_method, pointer_size); + } + DCHECK(resolved_method != nullptr); + return resolved_method; +} + bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, ArtMethod* resolved_method, - const InlineCache& ic) { + Handle<mirror::ObjectArray<mirror::Class>> classes) { DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface()) << invoke_instruction->DebugName(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - uint32_t class_index = FindClassIndexIn( - ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache()); - if (class_index == DexFile::kDexNoIndex) { - VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) - << " from inline cache is not inlined because its class is not" - << " accessible to the caller"; + dex::TypeIndex class_index = FindClassIndexIn( + GetMonomorphicType(classes), caller_compilation_unit_); + if (!class_index.IsValid()) { + LOG_FAIL(kNotInlinedDexCache) + << "Call to " << ArtMethod::PrettyMethod(resolved_method) + << " from inline cache is not inlined because its class is not" + << " accessible to the caller"; return false; } ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); - size_t pointer_size = class_linker->GetImagePointerSize(); - if (invoke_instruction->IsInvokeInterface()) { - resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface( - resolved_method, pointer_size); - } else { - DCHECK(invoke_instruction->IsInvokeVirtual()); - resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual( - resolved_method, pointer_size); + PointerSize pointer_size = class_linker->GetImagePointerSize(); + Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes)); + resolved_method = ResolveMethodFromInlineCache( + monomorphic_type, resolved_method, invoke_instruction, pointer_size); + + LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod(); + if (resolved_method == nullptr) { + // Bogus AOT profile, bail. + DCHECK(Runtime::Current()->IsAotCompiler()); + return false; } - DCHECK(resolved_method != nullptr); + HInstruction* receiver = invoke_instruction->InputAt(0); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - - if (!TryInlineAndReplace(invoke_instruction, resolved_method, /* do_rtp */ false)) { + if (!TryInlineAndReplace(invoke_instruction, + resolved_method, + ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true), + /* do_rtp */ false, + /* cha_devirtualize */ false)) { return false; } // We successfully inlined, now add a guard. - bool is_referrer = - (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); AddTypeGuard(receiver, cursor, bb_cursor, class_index, - is_referrer, + monomorphic_type, invoke_instruction, /* with_deoptimization */ true); // Run type propagation to get the guard typed, and eventually propagate the // type of the receiver. ReferenceTypePropagation rtp_fixup(graph_, + outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, /* is_first_run */ false); @@ -420,109 +796,177 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, return true; } +void HInliner::AddCHAGuard(HInstruction* invoke_instruction, + uint32_t dex_pc, + HInstruction* cursor, + HBasicBlock* bb_cursor) { + HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetArena()) + HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc); + HInstruction* compare = new (graph_->GetArena()) HNotEqual( + deopt_flag, graph_->GetIntConstant(0, dex_pc)); + HInstruction* deopt = new (graph_->GetArena()) HDeoptimize( + graph_->GetArena(), compare, DeoptimizationKind::kCHA, dex_pc); + + if (cursor != nullptr) { + bb_cursor->InsertInstructionAfter(deopt_flag, cursor); + } else { + bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction()); + } + bb_cursor->InsertInstructionAfter(compare, deopt_flag); + bb_cursor->InsertInstructionAfter(deopt, compare); + + // Add receiver as input to aid CHA guard optimization later. + deopt_flag->AddInput(invoke_instruction->InputAt(0)); + DCHECK_EQ(deopt_flag->InputCount(), 1u); + deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + outermost_graph_->IncrementNumberOfCHAGuards(); +} + HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, HInstruction* cursor, HBasicBlock* bb_cursor, - uint32_t class_index, - bool is_referrer, + dex::TypeIndex class_index, + Handle<mirror::Class> klass, HInstruction* invoke_instruction, bool with_deoptimization) { ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); HInstanceFieldGet* receiver_class = BuildGetReceiverClass( class_linker, receiver, invoke_instruction->GetDexPc()); + if (cursor != nullptr) { + bb_cursor->InsertInstructionAfter(receiver_class, cursor); + } else { + bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction()); + } const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + bool is_referrer; + ArtMethod* outermost_art_method = outermost_graph_->GetArtMethod(); + if (outermost_art_method == nullptr) { + DCHECK(Runtime::Current()->IsAotCompiler()); + // We are in AOT mode and we don't have an ART method to determine + // if the inlined method belongs to the referrer. Assume it doesn't. + is_referrer = false; + } else { + is_referrer = klass.Get() == outermost_art_method->GetDeclaringClass(); + } + // Note that we will just compare the classes, so we don't need Java semantics access checks. - // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache. + // Note that the type index and the dex file are relative to the method this type guard is + // inlined into. HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(), class_index, caller_dex_file, + klass, is_referrer, invoke_instruction->GetDexPc(), - /* needs_access_check */ false, - /* is_in_dex_cache */ true); + /* needs_access_check */ false); + HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind( + load_class, codegen_, compiler_driver_, caller_compilation_unit_); + DCHECK(kind != HLoadClass::LoadKind::kInvalid) + << "We should always be able to reference a class for inline caches"; + // Insert before setting the kind, as setting the kind affects the inputs. + bb_cursor->InsertInstructionAfter(load_class, receiver_class); + load_class->SetLoadKind(kind); + // In AOT mode, we will most likely load the class from BSS, which will involve a call + // to the runtime. In this case, the load instruction will need an environment so copy + // it from the invoke instruction. + if (load_class->NeedsEnvironment()) { + DCHECK(Runtime::Current()->IsAotCompiler()); + load_class->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + } HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); - // TODO: Extend reference type propagation to understand the guard. - if (cursor != nullptr) { - bb_cursor->InsertInstructionAfter(receiver_class, cursor); - } else { - bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction()); - } - bb_cursor->InsertInstructionAfter(load_class, receiver_class); bb_cursor->InsertInstructionAfter(compare, load_class); if (with_deoptimization) { HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - compare, invoke_instruction->GetDexPc()); + graph_->GetArena(), + compare, + receiver, + Runtime::Current()->IsAotCompiler() + ? DeoptimizationKind::kAotInlineCache + : DeoptimizationKind::kJitInlineCache, + invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + DCHECK_EQ(invoke_instruction->InputAt(0), receiver); + receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize); + deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo()); } return compare; } bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, ArtMethod* resolved_method, - const InlineCache& ic) { + Handle<mirror::ObjectArray<mirror::Class>> classes) { DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface()) << invoke_instruction->DebugName(); - if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) { + if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, classes)) { return true; } ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); - size_t pointer_size = class_linker->GetImagePointerSize(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + PointerSize pointer_size = class_linker->GetImagePointerSize(); bool all_targets_inlined = true; bool one_target_inlined = false; for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) { - if (ic.GetTypeAt(i) == nullptr) { + if (classes->Get(i) == nullptr) { break; } ArtMethod* method = nullptr; - if (invoke_instruction->IsInvokeInterface()) { - method = ic.GetTypeAt(i)->FindVirtualMethodForInterface( - resolved_method, pointer_size); - } else { - DCHECK(invoke_instruction->IsInvokeVirtual()); - method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual( - resolved_method, pointer_size); + + Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i)); + method = ResolveMethodFromInlineCache( + handle, resolved_method, invoke_instruction, pointer_size); + if (method == nullptr) { + DCHECK(Runtime::Current()->IsAotCompiler()); + // AOT profile is bogus. This loop expects to iterate over all entries, + // so just just continue. + all_targets_inlined = false; + continue; } HInstruction* receiver = invoke_instruction->InputAt(0); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - uint32_t class_index = FindClassIndexIn( - ic.GetTypeAt(i), caller_dex_file, caller_compilation_unit_.GetDexCache()); + dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_); HInstruction* return_replacement = nullptr; - if (class_index == DexFile::kDexNoIndex || - !TryBuildAndInline(invoke_instruction, method, &return_replacement)) { + LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod(); + if (!class_index.IsValid() || + !TryBuildAndInline(invoke_instruction, + method, + ReferenceTypeInfo::Create(handle, /* is_exact */ true), + &return_replacement)) { all_targets_inlined = false; } else { one_target_inlined = true; - bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + + LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method) + << " has inlined " << ArtMethod::PrettyMethod(method); // If we have inlined all targets before, and this receiver is the last seen, // we deoptimize instead of keeping the original invoke instruction. - bool deoptimize = all_targets_inlined && + bool deoptimize = !UseOnlyPolymorphicInliningWithNoDeopt() && + all_targets_inlined && (i != InlineCache::kIndividualCacheSize - 1) && - (ic.GetTypeAt(i + 1) == nullptr); - - if (outermost_graph_->IsCompilingOsr()) { - // We do not support HDeoptimize in OSR methods. - deoptimize = false; - } - HInstruction* compare = AddTypeGuard( - receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize); + (classes->Get(i + 1) == nullptr); + + HInstruction* compare = AddTypeGuard(receiver, + cursor, + bb_cursor, + class_index, + handle, + invoke_instruction, + deoptimize); if (deoptimize) { if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); } invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); // Because the inline cache data can be populated concurrently, we force the end of the - // iteration. Otherhwise, we could see a new receiver type. + // iteration. Otherwise, we could see a new receiver type. break; } else { CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction); @@ -531,15 +975,18 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, } if (!one_target_inlined) { - VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) - << " from inline cache is not inlined because none" - << " of its targets could be inlined"; + LOG_FAIL_NO_STAT() + << "Call to " << ArtMethod::PrettyMethod(resolved_method) + << " from inline cache is not inlined because none" + << " of its targets could be inlined"; return false; } + MaybeRecordStat(kInlinedPolymorphicCall); // Run type propagation to get the guards typed. ReferenceTypePropagation rtp_fixup(graph_, + outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, /* is_first_run */ false); @@ -623,17 +1070,17 @@ void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare, merge, original_invoke_block, /* replace_if_back_edge */ true); } -bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, - ArtMethod* resolved_method, - const InlineCache& ic) { +bool HInliner::TryInlinePolymorphicCallToSameTarget( + HInvoke* invoke_instruction, + ArtMethod* resolved_method, + Handle<mirror::ObjectArray<mirror::Class>> classes) { // This optimization only works under JIT for now. - DCHECK(Runtime::Current()->UseJitCompilation()); - if (graph_->GetInstructionSet() == kMips64) { - // TODO: Support HClassTableGet for mips64. + if (!Runtime::Current()->UseJitCompilation()) { return false; } + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); - size_t pointer_size = class_linker->GetImagePointerSize(); + PointerSize pointer_size = class_linker->GetImagePointerSize(); DCHECK(resolved_method != nullptr); ArtMethod* actual_method = nullptr; @@ -644,13 +1091,13 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, // Check whether we are actually calling the same method among // the different types seen. for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) { - if (ic.GetTypeAt(i) == nullptr) { + if (classes->Get(i) == nullptr) { break; } ArtMethod* new_method = nullptr; if (invoke_instruction->IsInvokeInterface()) { - new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get( - method_index % ImTable::kSize, pointer_size); + new_method = classes->Get(i)->GetImt(pointer_size)->Get( + method_index, pointer_size); if (new_method->IsRuntimeMethod()) { // Bail out as soon as we see a conflict trampoline in one of the target's // interface table. @@ -658,16 +1105,13 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, } } else { DCHECK(invoke_instruction->IsInvokeVirtual()); - new_method = ic.GetTypeAt(i)->GetEmbeddedVTableEntry(method_index, pointer_size); + new_method = classes->Get(i)->GetEmbeddedVTableEntry(method_index, pointer_size); } DCHECK(new_method != nullptr); if (actual_method == nullptr) { actual_method = new_method; } else if (actual_method != new_method) { // Different methods, bailout. - VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) - << " from inline cache is not inlined because it resolves" - << " to different methods"; return false; } } @@ -677,7 +1121,10 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); HInstruction* return_replacement = nullptr; - if (!TryBuildAndInline(invoke_instruction, actual_method, &return_replacement)) { + if (!TryBuildAndInline(invoke_instruction, + actual_method, + ReferenceTypeInfo::CreateInvalid(), + &return_replacement)) { return false; } @@ -717,19 +1164,25 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, if (outermost_graph_->IsCompilingOsr()) { CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction); } else { - // TODO: Extend reference type propagation to understand the guard. HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - compare, invoke_instruction->GetDexPc()); + graph_->GetArena(), + compare, + receiver, + DeoptimizationKind::kJitSameTarget, + invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); } + receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize); invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); + deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo()); } // Run type propagation to get the guard typed. ReferenceTypePropagation rtp_fixup(graph_, + outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, /* is_first_run */ false); @@ -737,28 +1190,116 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, MaybeRecordStat(kInlinedPolymorphicCall); + LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod(); return true; } -bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) { +bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, + ArtMethod* method, + ReferenceTypeInfo receiver_type, + bool do_rtp, + bool cha_devirtualize) { HInstruction* return_replacement = nullptr; - if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) { - return false; + uint32_t dex_pc = invoke_instruction->GetDexPc(); + HInstruction* cursor = invoke_instruction->GetPrevious(); + HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); + if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { + if (invoke_instruction->IsInvokeInterface()) { + DCHECK(!method->IsProxyMethod()); + // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always + // better than an invoke-interface because: + // 1) In the best case, the interface call has one more indirection (to fetch the IMT). + // 2) We will not go to the conflict trampoline with an invoke-virtual. + // TODO: Consider sharpening once it is not dependent on the compiler driver. + + if (method->IsDefault() && !method->IsCopied()) { + // Changing to invoke-virtual cannot be done on an original default method + // since it's not in any vtable. Devirtualization by exact type/inline-cache + // always uses a method in the iftable which is never an original default + // method. + // On the other hand, inlining an original default method by CHA is fine. + DCHECK(cha_devirtualize); + return false; + } + + const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + uint32_t dex_method_index = FindMethodIndexIn( + method, caller_dex_file, invoke_instruction->GetDexMethodIndex()); + if (dex_method_index == DexFile::kDexNoIndex) { + return false; + } + HInvokeVirtual* new_invoke = new (graph_->GetArena()) HInvokeVirtual( + graph_->GetArena(), + invoke_instruction->GetNumberOfArguments(), + invoke_instruction->GetType(), + invoke_instruction->GetDexPc(), + dex_method_index, + method, + method->GetMethodIndex()); + HInputsRef inputs = invoke_instruction->GetInputs(); + for (size_t index = 0; index != inputs.size(); ++index) { + new_invoke->SetArgumentAt(index, inputs[index]); + } + invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction); + new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + if (invoke_instruction->GetType() == Primitive::kPrimNot) { + new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); + } + return_replacement = new_invoke; + } else { + // TODO: Consider sharpening an invoke virtual once it is not dependent on the + // compiler driver. + return false; + } + } + if (cha_devirtualize) { + AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor); } if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); } invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); - FixUpReturnReferenceType(invoke_instruction, method, return_replacement, do_rtp); + FixUpReturnReferenceType(method, return_replacement); + if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) { + // Actual return value has a more specific type than the method's declared + // return type. Run RTP again on the outer graph to propagate it. + ReferenceTypePropagation(graph_, + outer_compilation_unit_.GetClassLoader(), + outer_compilation_unit_.GetDexCache(), + handles_, + /* is_first_run */ false).Run(); + } return true; } +size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const { + const HInliner* current = this; + size_t count = 0; + do { + if (current->graph_->GetArtMethod() == method) { + ++count; + } + current = current->parent_; + } while (current != nullptr); + return count; +} + bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, + ReferenceTypeInfo receiver_type, HInstruction** return_replacement) { if (method->IsProxyMethod()) { - VLOG(compiler) << "Method " << PrettyMethod(method) - << " is not inlined because of unimplemented inline support for proxy methods."; + LOG_FAIL(kNotInlinedProxy) + << "Method " << method->PrettyMethod() + << " is not inlined because of unimplemented inline support for proxy methods."; + return false; + } + + if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) { + LOG_FAIL(kNotInlinedRecursiveBudget) + << "Method " + << method->PrettyMethod() + << " is not inlined because it has reached its recursive call budget."; return false; } @@ -767,14 +1308,16 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (!compiler_driver_->MayInline(method->GetDexFile(), outer_compilation_unit_.GetDexFile())) { if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) { - VLOG(compiler) << "Successfully replaced pattern of invoke " << PrettyMethod(method); + LOG_SUCCESS() << "Successfully replaced pattern of invoke " + << method->PrettyMethod(); MaybeRecordStat(kReplacedInvokeWithSimplePattern); return true; } - VLOG(compiler) << "Won't inline " << PrettyMethod(method) << " in " - << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" - << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " - << method->GetDexFile()->GetLocation(); + LOG_FAIL(kNotInlinedWont) + << "Won't inline " << method->PrettyMethod() << " in " + << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" + << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " + << method->GetDexFile()->GetLocation(); return false; } @@ -783,30 +1326,32 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, const DexFile::CodeItem* code_item = method->GetCodeItem(); if (code_item == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method) - << " is not inlined because it is native"; + LOG_FAIL_NO_STAT() + << "Method " << method->PrettyMethod() << " is not inlined because it is native"; return false; } size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (code_item->insns_size_in_code_units_ > inline_max_code_units) { - VLOG(compiler) << "Method " << PrettyMethod(method) - << " is too big to inline: " - << code_item->insns_size_in_code_units_ - << " > " - << inline_max_code_units; + LOG_FAIL(kNotInlinedCodeItem) + << "Method " << method->PrettyMethod() + << " is not inlined because its code item is too big: " + << code_item->insns_size_in_code_units_ + << " > " + << inline_max_code_units; return false; } if (code_item->tries_size_ != 0) { - VLOG(compiler) << "Method " << PrettyMethod(method) - << " is not inlined because of try block"; + LOG_FAIL(kNotInlinedTryCatch) + << "Method " << method->PrettyMethod() << " is not inlined because of try block"; return false; } if (!method->IsCompilable()) { - VLOG(compiler) << "Method " << PrettyMethod(method) - << " has soft failures un-handled by the compiler, so it cannot be inlined"; + LOG_FAIL(kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " has soft failures un-handled by the compiler, so it cannot be inlined"; } if (!method->GetDeclaringClass()->IsVerified()) { @@ -814,8 +1359,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (Runtime::Current()->UseJitCompilation() || !compiler_driver_->IsMethodVerifiedWithoutFailures( method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { - VLOG(compiler) << "Method " << PrettyMethod(method) - << " couldn't be verified, so it cannot be inlined"; + LOG_FAIL(kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " couldn't be verified, so it cannot be inlined"; return false; } } @@ -824,24 +1370,25 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { // Case of a static method that cannot be inlined because it implicitly // requires an initialization check of its declaring class. - VLOG(compiler) << "Method " << PrettyMethod(method) - << " is not inlined because it is static and requires a clinit" - << " check that cannot be emitted due to Dex cache limitations"; + LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod() + << " is not inlined because it is static and requires a clinit" + << " check that cannot be emitted due to Dex cache limitations"; return false; } - if (!TryBuildAndInlineHelper(invoke_instruction, method, same_dex_file, return_replacement)) { + if (!TryBuildAndInlineHelper( + invoke_instruction, method, receiver_type, same_dex_file, return_replacement)) { return false; } - VLOG(compiler) << "Successfully inlined " << PrettyMethod(method); + LOG_SUCCESS() << method->PrettyMethod(); MaybeRecordStat(kInlinedInvoke); return true; } static HInstruction* GetInvokeInputForArgVRegIndex(HInvoke* invoke_instruction, size_t arg_vreg_index) - SHARED_REQUIRES(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) { size_t input_index = 0; for (size_t i = 0; i < arg_vreg_index; ++i, ++input_index) { DCHECK_LT(input_index, invoke_instruction->GetNumberOfArguments()); @@ -886,9 +1433,8 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, // TODO: Needs null check. return false; } - Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache())); HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg); - HInstanceFieldGet* iget = CreateInstanceFieldGet(dex_cache, data.field_idx, obj); + HInstanceFieldGet* iget = CreateInstanceFieldGet(data.field_idx, resolved_method, obj); DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset); DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile); invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction); @@ -901,10 +1447,9 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, // TODO: Needs null check. return false; } - Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache())); HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg); HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, data.src_arg); - HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, data.field_idx, obj, value); + HInstanceFieldSet* iput = CreateInstanceFieldSet(data.field_idx, resolved_method, obj, value); DCHECK_EQ(iput->GetFieldOffset().Uint32Value(), data.field_offset); DCHECK_EQ(iput->IsVolatile() ? 1u : 0u, data.is_volatile); invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction); @@ -938,24 +1483,19 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, [](uint16_t index) { return index != DexFile::kDexNoIndex16; })); // Create HInstanceFieldSet for each IPUT that stores non-zero data. - Handle<mirror::DexCache> dex_cache; HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u); bool needs_constructor_barrier = false; for (size_t i = 0; i != number_of_iputs; ++i) { HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]); if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) { - if (dex_cache.GetReference() == nullptr) { - dex_cache = handles_->NewHandle(resolved_method->GetDexCache()); - } uint16_t field_index = iput_field_indexes[i]; - HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, field_index, obj, value); + bool is_final; + HInstanceFieldSet* iput = + CreateInstanceFieldSet(field_index, resolved_method, obj, value, &is_final); invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction); // Check whether the field is final. If it is, we need to add a barrier. - size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size); - DCHECK(resolved_field != nullptr); - if (resolved_field->IsFinal()) { + if (is_final) { needs_constructor_barrier = true; } } @@ -974,72 +1514,99 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, return true; } -HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache, - uint32_t field_index, +HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, + ArtMethod* referrer, HInstruction* obj) - SHARED_REQUIRES(Locks::mutator_lock_) { - size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size); + REQUIRES_SHARED(Locks::mutator_lock_) { + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ArtField* resolved_field = + class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); DCHECK(resolved_field != nullptr); HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet( obj, + resolved_field, resolved_field->GetTypeAsPrimitiveType(), resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, resolved_field->GetDeclaringClass()->GetDexClassDefIndex(), - *dex_cache->GetDexFile(), - dex_cache, + *referrer->GetDexFile(), // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. /* dex_pc */ 0); if (iget->GetType() == Primitive::kPrimNot) { // Use the same dex_cache that we used for field lookup as the hint_dex_cache. - ReferenceTypePropagation rtp(graph_, dex_cache, handles_, /* is_first_run */ false); + Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache()); + ReferenceTypePropagation rtp(graph_, + outer_compilation_unit_.GetClassLoader(), + dex_cache, + handles_, + /* is_first_run */ false); rtp.Visit(iget); } return iget; } -HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache, - uint32_t field_index, +HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index, + ArtMethod* referrer, HInstruction* obj, - HInstruction* value) - SHARED_REQUIRES(Locks::mutator_lock_) { - size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size); + HInstruction* value, + bool* is_final) + REQUIRES_SHARED(Locks::mutator_lock_) { + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ArtField* resolved_field = + class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); DCHECK(resolved_field != nullptr); + if (is_final != nullptr) { + // This information is needed only for constructors. + DCHECK(referrer->IsConstructor()); + *is_final = resolved_field->IsFinal(); + } HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet( obj, value, + resolved_field, resolved_field->GetTypeAsPrimitiveType(), resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, resolved_field->GetDeclaringClass()->GetDexClassDefIndex(), - *dex_cache->GetDexFile(), - dex_cache, + *referrer->GetDexFile(), // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. /* dex_pc */ 0); return iput; } +template <typename T> +static inline Handle<T> NewHandleIfDifferent(T* object, + Handle<T> hint, + VariableSizedHandleScope* handles) + REQUIRES_SHARED(Locks::mutator_lock_) { + return (object != hint.Get()) ? handles->NewHandle(object) : hint; +} + bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, bool same_dex_file, HInstruction** return_replacement) { + DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); - Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache())); - Handle<mirror::ClassLoader> class_loader(handles_->NewHandle( - resolved_method->GetDeclaringClass()->GetClassLoader())); + Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(), + caller_compilation_unit_.GetDexCache(), + handles_); + Handle<mirror::ClassLoader> class_loader = + NewHandleIfDifferent(resolved_method->GetDeclaringClass()->GetClassLoader(), + caller_compilation_unit_.GetClassLoader(), + handles_); DexCompilationUnit dex_compilation_unit( - class_loader.ToJObject(), + class_loader, class_linker, callee_dex_file, code_item, @@ -1049,26 +1616,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, /* verified_method */ nullptr, dex_cache); - bool requires_ctor_barrier = false; - - if (dex_compilation_unit.IsConstructor()) { - // If it's a super invocation and we already generate a barrier there's no need - // to generate another one. - // We identify super calls by looking at the "this" pointer. If its value is the - // same as the local "this" pointer then we must have a super invocation. - bool is_super_invocation = invoke_instruction->InputAt(0)->IsParameterValue() - && invoke_instruction->InputAt(0)->AsParameterValue()->IsThis(); - if (is_super_invocation && graph_->ShouldGenerateConstructorBarrier()) { - requires_ctor_barrier = false; - } else { - Thread* self = Thread::Current(); - requires_ctor_barrier = compiler_driver_->RequiresConstructorBarrier(self, - dex_compilation_unit.GetDexFile(), - dex_compilation_unit.GetClassDefIndex()); - } - } - - InvokeType invoke_type = invoke_instruction->GetOriginalInvokeType(); + InvokeType invoke_type = invoke_instruction->GetInvokeType(); if (invoke_type == kInterface) { // We have statically resolved the dispatch. To please the class linker // at runtime, we change this call as if it was a virtual call. @@ -1080,7 +1628,6 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, graph_->GetArena(), callee_dex_file, method_index, - requires_ctor_barrier, compiler_driver_->GetInstructionSet(), invoke_type, graph_->IsDebuggable(), @@ -1088,43 +1635,54 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, caller_instruction_counter); callee_graph->SetArtMethod(resolved_method); - // When they are needed, allocate `inline_stats` on the heap instead + // When they are needed, allocate `inline_stats_` on the Arena instead // of on the stack, as Clang might produce a stack frame too large // for this function, that would not fit the requirements of the // `-Wframe-larger-than` option. - std::unique_ptr<OptimizingCompilerStats> inline_stats = - (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>(); + if (stats_ != nullptr) { + // Reuse one object for all inline attempts from this caller to keep Arena memory usage low. + if (inline_stats_ == nullptr) { + void* storage = graph_->GetArena()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc); + inline_stats_ = new (storage) OptimizingCompilerStats; + } else { + inline_stats_->Reset(); + } + } HGraphBuilder builder(callee_graph, &dex_compilation_unit, &outer_compilation_unit_, resolved_method->GetDexFile(), *code_item, compiler_driver_, - inline_stats.get(), - resolved_method->GetQuickenedInfo(), + codegen_, + inline_stats_, + resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()), dex_cache, handles_); if (builder.BuildGraph() != kAnalysisSuccess) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be built, so cannot be inlined"; + LOG_FAIL(kNotInlinedCannotBuild) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be built, so cannot be inlined"; return false; } if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " cannot be inlined because of the register allocator"; + LOG_FAIL(kNotInlinedRegisterAllocator) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " cannot be inlined because of the register allocator"; return false; } size_t parameter_index = 0; + bool run_rtp = false; for (HInstructionIterator instructions(callee_graph->GetEntryBlock()->GetInstructions()); !instructions.Done(); instructions.Advance()) { HInstruction* current = instructions.Current(); if (current->IsParameterValue()) { - HInstruction* argument = invoke_instruction->InputAt(parameter_index++); + HInstruction* argument = invoke_instruction->InputAt(parameter_index); if (argument->IsNullConstant()) { current->ReplaceWith(callee_graph->GetNullConstant()); } else if (argument->IsIntConstant()) { @@ -1138,112 +1696,134 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, current->ReplaceWith( callee_graph->GetDoubleConstant(argument->AsDoubleConstant()->GetValue())); } else if (argument->GetType() == Primitive::kPrimNot) { - current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo()); + if (!resolved_method->IsStatic() && parameter_index == 0 && receiver_type.IsValid()) { + run_rtp = true; + current->SetReferenceTypeInfo(receiver_type); + } else { + current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo()); + } current->AsParameterValue()->SetCanBeNull(argument->CanBeNull()); } + ++parameter_index; } } - size_t number_of_instructions_budget = kMaximumNumberOfHInstructions; - size_t number_of_inlined_instructions = - RunOptimizations(callee_graph, code_item, dex_compilation_unit); - number_of_instructions_budget += number_of_inlined_instructions; + // We have replaced formal arguments with actual arguments. If actual types + // are more specific than the declared ones, run RTP again on the inner graph. + if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { + ReferenceTypePropagation(callee_graph, + outer_compilation_unit_.GetClassLoader(), + dex_compilation_unit.GetDexCache(), + handles_, + /* is_first_run */ false).Run(); + } + + RunOptimizations(callee_graph, code_item, dex_compilation_unit); - // TODO: We should abort only if all predecessors throw. However, - // HGraph::InlineInto currently does not handle an exit block with - // a throw predecessor. HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it has an infinite loop"; + LOG_FAIL(kNotInlinedInfiniteLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it has an infinite loop"; return false; } - bool has_throw_predecessor = false; + bool has_one_return = false; for (HBasicBlock* predecessor : exit_block->GetPredecessors()) { if (predecessor->GetLastInstruction()->IsThrow()) { - has_throw_predecessor = true; - break; + if (invoke_instruction->GetBlock()->IsTryBlock()) { + // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto. + LOG_FAIL(kNotInlinedTryCatch) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because one branch always throws and" + << " caller is in a try/catch block"; + return false; + } else if (graph_->GetExitBlock() == nullptr) { + // TODO(ngeoffray): Support adding HExit in the caller graph. + LOG_FAIL(kNotInlinedInfiniteLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because one branch always throws and" + << " caller does not have an exit block"; + return false; + } else if (graph_->HasIrreducibleLoops()) { + // TODO(ngeoffray): Support re-computing loop information to graphs with + // irreducible loops? + VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because one branch always throws and" + << " caller has irreducible loops"; + return false; + } + } else { + has_one_return = true; } } - if (has_throw_predecessor) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because one branch always throws"; + + if (!has_one_return) { + LOG_FAIL(kNotInlinedAlwaysThrows) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it always throws"; return false; } - HReversePostOrderIterator it(*callee_graph); - it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining. size_t number_of_instructions = 0; - - bool can_inline_environment = - total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters; - - for (; !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - - if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { - // Don't inline methods with irreducible loops, they could prevent some - // optimizations to run. - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it contains an irreducible loop"; - return false; + // Skip the entry block, it does not contain instructions that prevent inlining. + for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) { + if (block->IsLoopHeader()) { + if (block->GetLoopInformation()->IsIrreducible()) { + // Don't inline methods with irreducible loops, they could prevent some + // optimizations to run. + LOG_FAIL(kNotInlinedIrreducibleLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it contains an irreducible loop"; + return false; + } + if (!block->GetLoopInformation()->HasExitEdge()) { + // Don't inline methods with loops without exit, since they cause the + // loop information to be computed incorrectly when updating after + // inlining. + LOG_FAIL(kNotInlinedLoopWithoutExit) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it contains a loop with no exit"; + return false; + } } for (HInstructionIterator instr_it(block->GetInstructions()); !instr_it.Done(); instr_it.Advance()) { - if (number_of_instructions++ == number_of_instructions_budget) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " is not inlined because its caller has reached" - << " its instruction budget limit."; + if (++number_of_instructions >= inlining_budget_) { + LOG_FAIL(kNotInlinedInstructionBudget) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " is not inlined because the outer method has reached" + << " its instruction budget limit."; return false; } HInstruction* current = instr_it.Current(); - if (!can_inline_environment && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " is not inlined because its caller has reached" - << " its environment budget limit."; - return false; - } - - if (current->IsInvokeInterface()) { - // Disable inlining of interface calls. The cost in case of entering the - // resolution conflict is currently too high. - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it has an interface call."; + if (current->NeedsEnvironment() && + (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) { + LOG_FAIL(kNotInlinedEnvironmentBudget) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " is not inlined because its caller has reached" + << " its environment budget limit."; return false; } - if (!same_dex_file && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because " << current->DebugName() - << " needs an environment and is in a different dex file"; + if (current->NeedsEnvironment() && + !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(), + resolved_method)) { + LOG_FAIL(kNotInlinedStackMaps) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because " << current->DebugName() + << " needs an environment, is in a different dex file" + << ", and cannot be encoded in the stack maps."; return false; } if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because " << current->DebugName() - << " it is in a different dex file and requires access to the dex cache"; - return false; - } - - if (current->IsNewInstance() && - (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it is using an entrypoint" - << " with access checks"; - // Allocation entrypoint does not handle inlined frames. - return false; - } - - if (current->IsNewArray() && - (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it is using an entrypoint" - << " with access checks"; - // Allocation entrypoint does not handle inlined frames. + LOG_FAIL(kNotInlinedDexCache) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because " << current->DebugName() + << " it is in a different dex file and requires access to the dex cache"; return false; } @@ -1252,38 +1832,46 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, current->IsUnresolvedStaticFieldSet() || current->IsUnresolvedInstanceFieldSet()) { // Entrypoint for unresolved fields does not handle inlined frames. - VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it is using an unresolved" - << " entrypoint"; + LOG_FAIL(kNotInlinedUnresolvedEntrypoint) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it is using an unresolved" + << " entrypoint"; return false; } } } - number_of_inlined_instructions_ += number_of_instructions; - DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId()) << "No instructions can be added to the outer graph while inner graph is being built"; + // Inline the callee graph inside the caller graph. const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId(); graph_->SetCurrentInstructionId(callee_instruction_counter); *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction); + // Update our budget for other inlining attempts in `caller_graph`. + total_number_of_instructions_ += number_of_instructions; + UpdateInliningBudget(); DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId()) << "No instructions can be added to the inner graph during inlining into the outer graph"; + if (stats_ != nullptr) { + DCHECK(inline_stats_ != nullptr); + inline_stats_->AddTo(stats_); + } + return true; } -size_t HInliner::RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit) { +void HInliner::RunOptimizations(HGraph* callee_graph, + const DexFile::CodeItem* code_item, + const DexCompilationUnit& dex_compilation_unit) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. - HDeadCodeElimination dce(callee_graph, stats_); - HConstantFolding fold(callee_graph); - HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_); - InstructionSimplifier simplify(callee_graph, stats_); - IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_); + HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); + HConstantFolding fold(callee_graph, "constant_folding$inliner"); + HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_); + InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); + IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); HOptimization* optimizations[] = { &intrinsics, @@ -1298,70 +1886,133 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, optimization->Run(); } - size_t number_of_inlined_instructions = 0u; - if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) { - HInliner inliner(callee_graph, - outermost_graph_, - codegen_, - outer_compilation_unit_, - dex_compilation_unit, - compiler_driver_, - handles_, - stats_, - total_number_of_dex_registers_ + code_item->registers_size_, - depth_ + 1); - inliner.Run(); - number_of_inlined_instructions += inliner.number_of_inlined_instructions_; + // Bail early for pathological cases on the environment (for example recursive calls, + // or too large environment). + if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) { + LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod() + << " will not be inlined because the outer method has reached" + << " its environment budget limit."; + return; } - return number_of_inlined_instructions; + // Bail early if we know we already are over the limit. + size_t number_of_instructions = CountNumberOfInstructions(callee_graph); + if (number_of_instructions > inlining_budget_) { + LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod() + << " will not be inlined because the outer method has reached" + << " its instruction budget limit. " << number_of_instructions; + return; + } + + HInliner inliner(callee_graph, + outermost_graph_, + codegen_, + outer_compilation_unit_, + dex_compilation_unit, + compiler_driver_, + handles_, + inline_stats_, + total_number_of_dex_registers_ + code_item->registers_size_, + total_number_of_instructions_ + number_of_instructions, + this, + depth_ + 1); + inliner.Run(); } -void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction, - ArtMethod* resolved_method, - HInstruction* return_replacement, - bool do_rtp) { +static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, + bool declared_can_be_null, + HInstruction* actual_obj) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (declared_can_be_null && !actual_obj->CanBeNull()) { + return true; + } + + ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo(); + return (actual_rti.IsExact() && !declared_rti.IsExact()) || + declared_rti.IsStrictSupertypeOf(actual_rti); +} + +ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) { + return ReferenceTypePropagation::IsAdmissible(klass) + ? ReferenceTypeInfo::Create(handles_->NewHandle(klass)) + : graph_->GetInexactObjectRti(); +} + +bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) { + // If this is an instance call, test whether the type of the `this` argument + // is more specific than the class which declares the method. + if (!resolved_method->IsStatic()) { + if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()), + /* declared_can_be_null */ false, + invoke_instruction->InputAt(0u))) { + return true; + } + } + + // Iterate over the list of parameter types and test whether any of the + // actual inputs has a more specific reference type than the type declared in + // the signature. + const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList(); + for (size_t param_idx = 0, + input_idx = resolved_method->IsStatic() ? 0 : 1, + e = (param_list == nullptr ? 0 : param_list->Size()); + param_idx < e; + ++param_idx, ++input_idx) { + HInstruction* input = invoke_instruction->InputAt(input_idx); + if (input->GetType() == Primitive::kPrimNot) { + mirror::Class* param_cls = resolved_method->GetClassFromTypeIndex( + param_list->GetTypeItem(param_idx).type_idx_, + /* resolve */ false); + if (IsReferenceTypeRefinement(GetClassRTI(param_cls), + /* declared_can_be_null */ true, + input)) { + return true; + } + } + } + + return false; +} + +bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, + HInstruction* return_replacement) { // Check the integrity of reference types and run another type propagation if needed. if (return_replacement != nullptr) { if (return_replacement->GetType() == Primitive::kPrimNot) { + // Test if the return type is a refinement of the declared return type. + if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(), + /* declared_can_be_null */ true, + return_replacement)) { + return true; + } else if (return_replacement->IsInstanceFieldGet()) { + HInstanceFieldGet* field_get = return_replacement->AsInstanceFieldGet(); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + if (field_get->GetFieldInfo().GetField() == + class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0)) { + return true; + } + } + } else if (return_replacement->IsInstanceOf()) { + // Inlining InstanceOf into an If may put a tighter bound on reference types. + return true; + } + } + + return false; +} + +void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method, + HInstruction* return_replacement) { + if (return_replacement != nullptr) { + if (return_replacement->GetType() == Primitive::kPrimNot) { if (!return_replacement->GetReferenceTypeInfo().IsValid()) { // Make sure that we have a valid type for the return. We may get an invalid one when // we inline invokes with multiple branches and create a Phi for the result. // TODO: we could be more precise by merging the phi inputs but that requires // some functionality from the reference type propagation. DCHECK(return_replacement->IsPhi()); - size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); - mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size); - if (cls != nullptr && !cls->IsErroneous()) { - ReferenceTypeInfo::TypeHandle return_handle = handles_->NewHandle(cls); - return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( - return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); - } else { - // Return inexact object type on failures. - return_replacement->SetReferenceTypeInfo(graph_->GetInexactObjectRti()); - } - } - - if (do_rtp) { - // If the return type is a refinement of the declared type run the type propagation again. - ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); - ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); - if (invoke_rti.IsStrictSupertypeOf(return_rti) - || (return_rti.IsExact() && !invoke_rti.IsExact()) - || !return_replacement->CanBeNull()) { - ReferenceTypePropagation(graph_, - outer_compilation_unit_.GetDexCache(), - handles_, - /* is_first_run */ false).Run(); - } - } - } else if (return_replacement->IsInstanceOf()) { - if (do_rtp) { - // Inlining InstanceOf into an If may put a tighter bound on reference types. - ReferenceTypePropagation(graph_, - outer_compilation_unit_.GetDexCache(), - handles_, - /* is_first_run */ false).Run(); + mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */); + return_replacement->SetReferenceTypeInfo(GetClassRTI(cls)); } } } |