diff options
author | 2024-02-07 11:53:09 +0000 | |
---|---|---|
committer | 2024-02-08 15:56:21 +0000 | |
commit | e872656585952f993eb84633a66e0aedcbdf52ac (patch) | |
tree | 82f08a5d1dd1ca5247810b20a92c5a56a48b34a1 | |
parent | 03ca5cf9db4110962700d47b7b5bd04592cac157 (diff) |
Only compile optimized if it is useful.
If profiling doesn't benefit the method, switch a baseline compilation
into optimized.
Reduces the number of JIT compilations on the Sheets benchmark from
~3100 (2250 baseline, 850 optimized) to ~2750 (2250 baseline, 500
optimized).
Test: test.py
Change-Id: I94760481d130d2dc168152daa94429baf201f66e
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_riscv64.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/inliner.cc | 14 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 8 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 7 | ||||
-rw-r--r-- | compiler/optimizing/profiling_info_builder.cc | 7 | ||||
-rw-r--r-- | runtime/jit/jit_code_cache.cc | 156 |
10 files changed, 126 insertions, 88 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5ba26b4754..e22b24ef2f 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1306,7 +1306,9 @@ void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, boo __ Bind(&done); } - if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + if (GetGraph()->IsCompilingBaseline() && + GetGraph()->IsUsefulOptimizing() && + !Runtime::Current()->IsAotCompiler()) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); DCHECK(!HasEmptyFrame()); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 85f61f5303..75fae4e859 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2302,7 +2302,9 @@ void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check, } } - if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + if (GetGraph()->IsCompilingBaseline() && + GetGraph()->IsUsefulOptimizing() && + !Runtime::Current()->IsAotCompiler()) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); DCHECK(!HasEmptyFrame()); diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc index ed57683e0a..93bd35b618 100644 --- a/compiler/optimizing/code_generator_riscv64.cc +++ b/compiler/optimizing/code_generator_riscv64.cc @@ -5763,7 +5763,9 @@ void CodeGeneratorRISCV64::MaybeIncrementHotness(HSuspendCheck* suspend_check, __ Bind(&done); } - if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + if (GetGraph()->IsCompilingBaseline() && + GetGraph()->IsUsefulOptimizing() && + !Runtime::Current()->IsAotCompiler()) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); DCHECK(!HasEmptyFrame()); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a61dca3022..21d3492e8a 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1357,9 +1357,9 @@ void CodeGeneratorX86::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool } } - if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { - // Note the slow path doesn't save SIMD registers, so if we were to - // call it on loop back edge, we would need to fix this. + if (GetGraph()->IsCompilingBaseline() && + GetGraph()->IsUsefulOptimizing() && + !Runtime::Current()->IsAotCompiler()) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); uint32_t address = reinterpret_cast32<uint32_t>(info) + diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index db4062b00d..af6c6255e5 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1788,7 +1788,9 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bo __ Bind(&overflow); } - if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + if (GetGraph()->IsCompilingBaseline() && + GetGraph()->IsUsefulOptimizing() && + !Runtime::Current()->IsAotCompiler()) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); CHECK(!HasEmptyFrame()); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index fd3e787fc8..d7ca17b646 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -541,6 +541,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { << " statically resolve the target"; // For baseline compilation, we will collect inline caches, so we should not // try to inline using them. + outermost_graph_->SetUsefulOptimizing(); return false; } @@ -1552,9 +1553,7 @@ bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction, return false; } - size_t inline_max_code_units = graph_->IsCompilingBaseline() - ? CompilerOptions::kBaselineInlineMaxCodeUnits - : codegen_->GetCompilerOptions().GetInlineMaxCodeUnits(); + size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem) << "Method " << method->PrettyMethod() @@ -1565,6 +1564,14 @@ bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction, return false; } + if (graph_->IsCompilingBaseline() && + accessor.InsnsSizeInCodeUnits() > CompilerOptions::kBaselineInlineMaxCodeUnits) { + LOG_FAIL_NO_STAT() << "Reached baseline maximum code unit for inlining " + << method->PrettyMethod(); + outermost_graph_->SetUsefulOptimizing(); + return false; + } + if (invoke_instruction->GetBlock()->GetLastInstruction()->IsThrow()) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEndsWithThrow) << "Method " << method->PrettyMethod() @@ -2129,6 +2136,7 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, if (depth_ + 1 > maximum_inlining_depth_for_baseline) { LOG_FAIL_NO_STAT() << "Reached maximum depth for inlining in baseline compilation: " << depth_ << " for " << callee_graph->GetArtMethod()->PrettyMethod(); + outermost_graph_->SetUsefulOptimizing(); return false; } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index c862e31de7..367f45f3a4 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -425,6 +425,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { cached_current_method_(nullptr), art_method_(nullptr), compilation_kind_(compilation_kind), + useful_optimizing_(false), cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)) { blocks_.reserve(kDefaultNumberOfBlocks); } @@ -742,6 +743,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; } void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; } + void SetUsefulOptimizing() { useful_optimizing_ = true; } + bool IsUsefulOptimizing() const { return useful_optimizing_; } + private: void RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); @@ -897,6 +901,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // directly jump to. const CompilationKind compilation_kind_; + // Whether after compiling baseline it is still useful re-optimizing this + // method. + bool useful_optimizing_; + // List of methods that are assumed to have single implementation. ArenaSet<ArtMethod*> cha_single_implementation_list_; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index a1c4130bc1..65e8e51712 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -905,6 +905,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, } if (compilation_kind == CompilationKind::kBaseline && compiler_options.ProfileBranches()) { + graph->SetUsefulOptimizing(); // Branch profiling currently doesn't support running optimizations. RunRequiredPasses(graph, codegen.get(), dex_compilation_unit, &pass_observer); } else { @@ -917,6 +918,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, // this method already, do it now. if (jit != nullptr && compilation_kind == CompilationKind::kBaseline && + graph->IsUsefulOptimizing() && graph->GetProfilingInfo() == nullptr) { ProfilingInfoBuilder( graph, codegen->GetCompilerOptions(), codegen.get(), compilation_stats_.get()).Run(); @@ -1448,6 +1450,11 @@ bool OptimizingCompiler::JitCompile(Thread* self, debug_info = GenerateJitDebugInfo(info); } + if (compilation_kind == CompilationKind::kBaseline && + !codegen->GetGraph()->IsUsefulOptimizing()) { + compilation_kind = CompilationKind::kOptimized; + } + if (!code_cache->Commit(self, region, method, diff --git a/compiler/optimizing/profiling_info_builder.cc b/compiler/optimizing/profiling_info_builder.cc index 19795f5466..f6cf676813 100644 --- a/compiler/optimizing/profiling_info_builder.cc +++ b/compiler/optimizing/profiling_info_builder.cc @@ -28,6 +28,7 @@ namespace art HIDDEN { void ProfilingInfoBuilder::Run() { + DCHECK(GetGraph()->IsUsefulOptimizing()); DCHECK_EQ(GetGraph()->GetProfilingInfo(), nullptr); // Order does not matter. for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { @@ -122,6 +123,12 @@ bool ProfilingInfoBuilder::IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* c return false; } } + + if (!codegen->GetGraph()->IsUsefulOptimizing()) { + // Earlier pass knew what the calling target was. No need for an inline + // cache. + return false; + } return true; } diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 7bfbe15059..3560ac17ff 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -1429,18 +1429,20 @@ void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_loca ScopedTrace trace(__FUNCTION__); Thread* self = Thread::Current(); WaitUntilInlineCacheAccessible(self); - std::vector<ProfilingInfo*> copies; + SafeMap<ArtMethod*, ProfilingInfo*> profiling_infos; + std::vector<ArtMethod*> copies; // TODO: Avoid read barriers for potentially dead methods. // ScopedDebugDisallowReadBarriers sddrb(self); { MutexLock mu(self, *Locks::jit_lock_); - copies.reserve(profiling_infos_.size()); - for (const auto& entry : profiling_infos_) { + profiling_infos = profiling_infos_; + for (const auto& entry : method_code_map_) { copies.push_back(entry.second); } } - for (ProfilingInfo* info : copies) { - ArtMethod* method = info->GetMethod(); + for (ArtMethod* method : copies) { + auto it = profiling_infos.find(method); + ProfilingInfo* info = (it == profiling_infos.end()) ? nullptr : it->second; const DexFile* dex_file = method->GetDexFile(); const std::string base_location = DexFileLoader::GetBaseLocation(dex_file->GetLocation()); if (!ContainsElement(dex_base_locations, base_location)) { @@ -1449,74 +1451,76 @@ void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_loca } std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches; - // If the method is still baseline compiled and doesn't meet the inline cache threshold, don't - // save the inline caches because they might be incomplete. - // Although we don't deoptimize for incomplete inline caches in AOT-compiled code, inlining - // leads to larger generated code. - // If the inline cache is empty the compiler will generate a regular invoke virtual/interface. - const void* entry_point = method->GetEntryPointFromQuickCompiledCode(); - if (ContainsPc(entry_point) && - CodeInfo::IsBaseline( - OatQuickMethodHeader::FromEntryPoint(entry_point)->GetOptimizedCodeInfoPtr()) && - (ProfilingInfo::GetOptimizeThreshold() - info->GetBaselineHotnessCount()) < - inline_cache_threshold) { - methods.emplace_back(/*ProfileMethodInfo*/ - MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches); - continue; - } - - for (size_t i = 0; i < info->number_of_inline_caches_; ++i) { - std::vector<TypeReference> profile_classes; - const InlineCache& cache = info->GetInlineCaches()[i]; - ArtMethod* caller = info->GetMethod(); - bool is_missing_types = false; - for (size_t k = 0; k < InlineCache::kIndividualCacheSize; k++) { - mirror::Class* cls = cache.classes_[k].Read(); - if (cls == nullptr) { - break; - } + if (info != nullptr) { + // If the method is still baseline compiled and doesn't meet the inline cache threshold, don't + // save the inline caches because they might be incomplete. + // Although we don't deoptimize for incomplete inline caches in AOT-compiled code, inlining + // leads to larger generated code. + // If the inline cache is empty the compiler will generate a regular invoke virtual/interface. + const void* entry_point = method->GetEntryPointFromQuickCompiledCode(); + if (ContainsPc(entry_point) && + CodeInfo::IsBaseline( + OatQuickMethodHeader::FromEntryPoint(entry_point)->GetOptimizedCodeInfoPtr()) && + (ProfilingInfo::GetOptimizeThreshold() - info->GetBaselineHotnessCount()) < + inline_cache_threshold) { + methods.emplace_back(/*ProfileMethodInfo*/ + MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches); + continue; + } - // Check if the receiver is in the boot class path or if it's in the - // same class loader as the caller. If not, skip it, as there is not - // much we can do during AOT. - if (!cls->IsBootStrapClassLoaded() && - caller->GetClassLoader() != cls->GetClassLoader()) { - is_missing_types = true; - continue; - } + for (size_t i = 0; i < info->number_of_inline_caches_; ++i) { + std::vector<TypeReference> profile_classes; + const InlineCache& cache = info->GetInlineCaches()[i]; + ArtMethod* caller = info->GetMethod(); + bool is_missing_types = false; + for (size_t k = 0; k < InlineCache::kIndividualCacheSize; k++) { + mirror::Class* cls = cache.classes_[k].Read(); + if (cls == nullptr) { + break; + } - const DexFile* class_dex_file = nullptr; - dex::TypeIndex type_index; + // Check if the receiver is in the boot class path or if it's in the + // same class loader as the caller. If not, skip it, as there is not + // much we can do during AOT. + if (!cls->IsBootStrapClassLoaded() && + caller->GetClassLoader() != cls->GetClassLoader()) { + is_missing_types = true; + continue; + } - if (cls->GetDexCache() == nullptr) { - DCHECK(cls->IsArrayClass()) << cls->PrettyClass(); - // Make a best effort to find the type index in the method's dex file. - // We could search all open dex files but that might turn expensive - // and probably not worth it. - class_dex_file = dex_file; - type_index = cls->FindTypeIndexInOtherDexFile(*dex_file); - } else { - class_dex_file = &(cls->GetDexFile()); - type_index = cls->GetDexTypeIndex(); - } - if (!type_index.IsValid()) { - // Could be a proxy class or an array for which we couldn't find the type index. - is_missing_types = true; - continue; + const DexFile* class_dex_file = nullptr; + dex::TypeIndex type_index; + + if (cls->GetDexCache() == nullptr) { + DCHECK(cls->IsArrayClass()) << cls->PrettyClass(); + // Make a best effort to find the type index in the method's dex file. + // We could search all open dex files but that might turn expensive + // and probably not worth it. + class_dex_file = dex_file; + type_index = cls->FindTypeIndexInOtherDexFile(*dex_file); + } else { + class_dex_file = &(cls->GetDexFile()); + type_index = cls->GetDexTypeIndex(); + } + if (!type_index.IsValid()) { + // Could be a proxy class or an array for which we couldn't find the type index. + is_missing_types = true; + continue; + } + if (ContainsElement(dex_base_locations, + DexFileLoader::GetBaseLocation(class_dex_file->GetLocation()))) { + // Only consider classes from the same apk (including multidex). + profile_classes.emplace_back(/*ProfileMethodInfo::ProfileClassReference*/ + class_dex_file, type_index); + } else { + is_missing_types = true; + } } - if (ContainsElement(dex_base_locations, - DexFileLoader::GetBaseLocation(class_dex_file->GetLocation()))) { - // Only consider classes from the same apk (including multidex). - profile_classes.emplace_back(/*ProfileMethodInfo::ProfileClassReference*/ - class_dex_file, type_index); - } else { - is_missing_types = true; + if (!profile_classes.empty()) { + inline_caches.emplace_back(/*ProfileMethodInfo::ProfileInlineCache*/ + cache.dex_pc_, is_missing_types, profile_classes); } } - if (!profile_classes.empty()) { - inline_caches.emplace_back(/*ProfileMethodInfo::ProfileInlineCache*/ - cache.dex_pc_, is_missing_types, profile_classes); - } } methods.emplace_back(/*ProfileMethodInfo*/ MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches); @@ -1535,17 +1539,13 @@ bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, CompilationKind compilation_kind, bool prejit) { const void* existing_entry_point = method->GetEntryPointFromQuickCompiledCode(); - if (compilation_kind != CompilationKind::kOsr && ContainsPc(existing_entry_point)) { - OatQuickMethodHeader* method_header = - OatQuickMethodHeader::FromEntryPoint(existing_entry_point); - bool is_baseline = (compilation_kind == CompilationKind::kBaseline); - if (CodeInfo::IsBaseline(method_header->GetOptimizedCodeInfoPtr()) == is_baseline) { - VLOG(jit) << "Not compiling " - << method->PrettyMethod() - << " because it has already been compiled" - << " kind=" << compilation_kind; - return false; - } + if (compilation_kind == CompilationKind::kBaseline && ContainsPc(existing_entry_point)) { + // The existing entry point is either already baseline, or optimized. No + // need to compile. + VLOG(jit) << "Not compiling " + << method->PrettyMethod() + << " baseline, because it has already been compiled"; + return false; } if (method->NeedsClinitCheckBeforeCall() && !prejit) { |