summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Nicolas Geoffray <ngeoffray@google.com> 2024-02-07 11:53:09 +0000
committer Nicolas Geoffray <ngeoffray@google.com> 2024-02-08 15:56:21 +0000
commite872656585952f993eb84633a66e0aedcbdf52ac (patch)
tree82f08a5d1dd1ca5247810b20a92c5a56a48b34a1
parent03ca5cf9db4110962700d47b7b5bd04592cac157 (diff)
Only compile optimized if it is useful.
If profiling doesn't benefit the method, switch a baseline compilation into optimized. Reduces the number of JIT compilations on the Sheets benchmark from ~3100 (2250 baseline, 850 optimized) to ~2750 (2250 baseline, 500 optimized). Test: test.py Change-Id: I94760481d130d2dc168152daa94429baf201f66e
-rw-r--r--compiler/optimizing/code_generator_arm64.cc4
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc4
-rw-r--r--compiler/optimizing/code_generator_riscv64.cc4
-rw-r--r--compiler/optimizing/code_generator_x86.cc6
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc4
-rw-r--r--compiler/optimizing/inliner.cc14
-rw-r--r--compiler/optimizing/nodes.h8
-rw-r--r--compiler/optimizing/optimizing_compiler.cc7
-rw-r--r--compiler/optimizing/profiling_info_builder.cc7
-rw-r--r--runtime/jit/jit_code_cache.cc156
10 files changed, 126 insertions, 88 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5ba26b4754..e22b24ef2f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1306,7 +1306,9 @@ void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, boo
__ Bind(&done);
}
- if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ GetGraph()->IsUsefulOptimizing() &&
+ !Runtime::Current()->IsAotCompiler()) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
DCHECK(!HasEmptyFrame());
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 85f61f5303..75fae4e859 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2302,7 +2302,9 @@ void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check,
}
}
- if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ GetGraph()->IsUsefulOptimizing() &&
+ !Runtime::Current()->IsAotCompiler()) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
DCHECK(!HasEmptyFrame());
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
index ed57683e0a..93bd35b618 100644
--- a/compiler/optimizing/code_generator_riscv64.cc
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -5763,7 +5763,9 @@ void CodeGeneratorRISCV64::MaybeIncrementHotness(HSuspendCheck* suspend_check,
__ Bind(&done);
}
- if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ GetGraph()->IsUsefulOptimizing() &&
+ !Runtime::Current()->IsAotCompiler()) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
DCHECK(!HasEmptyFrame());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a61dca3022..21d3492e8a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1357,9 +1357,9 @@ void CodeGeneratorX86::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool
}
}
- if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- // Note the slow path doesn't save SIMD registers, so if we were to
- // call it on loop back edge, we would need to fix this.
+ if (GetGraph()->IsCompilingBaseline() &&
+ GetGraph()->IsUsefulOptimizing() &&
+ !Runtime::Current()->IsAotCompiler()) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
uint32_t address = reinterpret_cast32<uint32_t>(info) +
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index db4062b00d..af6c6255e5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1788,7 +1788,9 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bo
__ Bind(&overflow);
}
- if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ GetGraph()->IsUsefulOptimizing() &&
+ !Runtime::Current()->IsAotCompiler()) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
CHECK(!HasEmptyFrame());
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index fd3e787fc8..d7ca17b646 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -541,6 +541,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
<< " statically resolve the target";
// For baseline compilation, we will collect inline caches, so we should not
// try to inline using them.
+ outermost_graph_->SetUsefulOptimizing();
return false;
}
@@ -1552,9 +1553,7 @@ bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction,
return false;
}
- size_t inline_max_code_units = graph_->IsCompilingBaseline()
- ? CompilerOptions::kBaselineInlineMaxCodeUnits
- : codegen_->GetCompilerOptions().GetInlineMaxCodeUnits();
+ size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits();
if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) {
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem)
<< "Method " << method->PrettyMethod()
@@ -1565,6 +1564,14 @@ bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction,
return false;
}
+ if (graph_->IsCompilingBaseline() &&
+ accessor.InsnsSizeInCodeUnits() > CompilerOptions::kBaselineInlineMaxCodeUnits) {
+ LOG_FAIL_NO_STAT() << "Reached baseline maximum code unit for inlining "
+ << method->PrettyMethod();
+ outermost_graph_->SetUsefulOptimizing();
+ return false;
+ }
+
if (invoke_instruction->GetBlock()->GetLastInstruction()->IsThrow()) {
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEndsWithThrow)
<< "Method " << method->PrettyMethod()
@@ -2129,6 +2136,7 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
if (depth_ + 1 > maximum_inlining_depth_for_baseline) {
LOG_FAIL_NO_STAT() << "Reached maximum depth for inlining in baseline compilation: "
<< depth_ << " for " << callee_graph->GetArtMethod()->PrettyMethod();
+ outermost_graph_->SetUsefulOptimizing();
return false;
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c862e31de7..367f45f3a4 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -425,6 +425,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
cached_current_method_(nullptr),
art_method_(nullptr),
compilation_kind_(compilation_kind),
+ useful_optimizing_(false),
cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)) {
blocks_.reserve(kDefaultNumberOfBlocks);
}
@@ -742,6 +743,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; }
void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; }
+ void SetUsefulOptimizing() { useful_optimizing_ = true; }
+ bool IsUsefulOptimizing() const { return useful_optimizing_; }
+
private:
void RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const;
void RemoveDeadBlocks(const ArenaBitVector& visited);
@@ -897,6 +901,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// directly jump to.
const CompilationKind compilation_kind_;
+ // Whether after compiling baseline it is still useful re-optimizing this
+ // method.
+ bool useful_optimizing_;
+
// List of methods that are assumed to have single implementation.
ArenaSet<ArtMethod*> cha_single_implementation_list_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index a1c4130bc1..65e8e51712 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -905,6 +905,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
}
if (compilation_kind == CompilationKind::kBaseline && compiler_options.ProfileBranches()) {
+ graph->SetUsefulOptimizing();
// Branch profiling currently doesn't support running optimizations.
RunRequiredPasses(graph, codegen.get(), dex_compilation_unit, &pass_observer);
} else {
@@ -917,6 +918,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
// this method already, do it now.
if (jit != nullptr &&
compilation_kind == CompilationKind::kBaseline &&
+ graph->IsUsefulOptimizing() &&
graph->GetProfilingInfo() == nullptr) {
ProfilingInfoBuilder(
graph, codegen->GetCompilerOptions(), codegen.get(), compilation_stats_.get()).Run();
@@ -1448,6 +1450,11 @@ bool OptimizingCompiler::JitCompile(Thread* self,
debug_info = GenerateJitDebugInfo(info);
}
+ if (compilation_kind == CompilationKind::kBaseline &&
+ !codegen->GetGraph()->IsUsefulOptimizing()) {
+ compilation_kind = CompilationKind::kOptimized;
+ }
+
if (!code_cache->Commit(self,
region,
method,
diff --git a/compiler/optimizing/profiling_info_builder.cc b/compiler/optimizing/profiling_info_builder.cc
index 19795f5466..f6cf676813 100644
--- a/compiler/optimizing/profiling_info_builder.cc
+++ b/compiler/optimizing/profiling_info_builder.cc
@@ -28,6 +28,7 @@
namespace art HIDDEN {
void ProfilingInfoBuilder::Run() {
+ DCHECK(GetGraph()->IsUsefulOptimizing());
DCHECK_EQ(GetGraph()->GetProfilingInfo(), nullptr);
// Order does not matter.
for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
@@ -122,6 +123,12 @@ bool ProfilingInfoBuilder::IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* c
return false;
}
}
+
+ if (!codegen->GetGraph()->IsUsefulOptimizing()) {
+ // Earlier pass knew what the calling target was. No need for an inline
+ // cache.
+ return false;
+ }
return true;
}
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 7bfbe15059..3560ac17ff 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1429,18 +1429,20 @@ void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_loca
ScopedTrace trace(__FUNCTION__);
Thread* self = Thread::Current();
WaitUntilInlineCacheAccessible(self);
- std::vector<ProfilingInfo*> copies;
+ SafeMap<ArtMethod*, ProfilingInfo*> profiling_infos;
+ std::vector<ArtMethod*> copies;
// TODO: Avoid read barriers for potentially dead methods.
// ScopedDebugDisallowReadBarriers sddrb(self);
{
MutexLock mu(self, *Locks::jit_lock_);
- copies.reserve(profiling_infos_.size());
- for (const auto& entry : profiling_infos_) {
+ profiling_infos = profiling_infos_;
+ for (const auto& entry : method_code_map_) {
copies.push_back(entry.second);
}
}
- for (ProfilingInfo* info : copies) {
- ArtMethod* method = info->GetMethod();
+ for (ArtMethod* method : copies) {
+ auto it = profiling_infos.find(method);
+ ProfilingInfo* info = (it == profiling_infos.end()) ? nullptr : it->second;
const DexFile* dex_file = method->GetDexFile();
const std::string base_location = DexFileLoader::GetBaseLocation(dex_file->GetLocation());
if (!ContainsElement(dex_base_locations, base_location)) {
@@ -1449,74 +1451,76 @@ void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_loca
}
std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
- // If the method is still baseline compiled and doesn't meet the inline cache threshold, don't
- // save the inline caches because they might be incomplete.
- // Although we don't deoptimize for incomplete inline caches in AOT-compiled code, inlining
- // leads to larger generated code.
- // If the inline cache is empty the compiler will generate a regular invoke virtual/interface.
- const void* entry_point = method->GetEntryPointFromQuickCompiledCode();
- if (ContainsPc(entry_point) &&
- CodeInfo::IsBaseline(
- OatQuickMethodHeader::FromEntryPoint(entry_point)->GetOptimizedCodeInfoPtr()) &&
- (ProfilingInfo::GetOptimizeThreshold() - info->GetBaselineHotnessCount()) <
- inline_cache_threshold) {
- methods.emplace_back(/*ProfileMethodInfo*/
- MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches);
- continue;
- }
-
- for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
- std::vector<TypeReference> profile_classes;
- const InlineCache& cache = info->GetInlineCaches()[i];
- ArtMethod* caller = info->GetMethod();
- bool is_missing_types = false;
- for (size_t k = 0; k < InlineCache::kIndividualCacheSize; k++) {
- mirror::Class* cls = cache.classes_[k].Read();
- if (cls == nullptr) {
- break;
- }
+ if (info != nullptr) {
+ // If the method is still baseline compiled and doesn't meet the inline cache threshold, don't
+ // save the inline caches because they might be incomplete.
+ // Although we don't deoptimize for incomplete inline caches in AOT-compiled code, inlining
+ // leads to larger generated code.
+ // If the inline cache is empty the compiler will generate a regular invoke virtual/interface.
+ const void* entry_point = method->GetEntryPointFromQuickCompiledCode();
+ if (ContainsPc(entry_point) &&
+ CodeInfo::IsBaseline(
+ OatQuickMethodHeader::FromEntryPoint(entry_point)->GetOptimizedCodeInfoPtr()) &&
+ (ProfilingInfo::GetOptimizeThreshold() - info->GetBaselineHotnessCount()) <
+ inline_cache_threshold) {
+ methods.emplace_back(/*ProfileMethodInfo*/
+ MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches);
+ continue;
+ }
- // Check if the receiver is in the boot class path or if it's in the
- // same class loader as the caller. If not, skip it, as there is not
- // much we can do during AOT.
- if (!cls->IsBootStrapClassLoaded() &&
- caller->GetClassLoader() != cls->GetClassLoader()) {
- is_missing_types = true;
- continue;
- }
+ for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
+ std::vector<TypeReference> profile_classes;
+ const InlineCache& cache = info->GetInlineCaches()[i];
+ ArtMethod* caller = info->GetMethod();
+ bool is_missing_types = false;
+ for (size_t k = 0; k < InlineCache::kIndividualCacheSize; k++) {
+ mirror::Class* cls = cache.classes_[k].Read();
+ if (cls == nullptr) {
+ break;
+ }
- const DexFile* class_dex_file = nullptr;
- dex::TypeIndex type_index;
+ // Check if the receiver is in the boot class path or if it's in the
+ // same class loader as the caller. If not, skip it, as there is not
+ // much we can do during AOT.
+ if (!cls->IsBootStrapClassLoaded() &&
+ caller->GetClassLoader() != cls->GetClassLoader()) {
+ is_missing_types = true;
+ continue;
+ }
- if (cls->GetDexCache() == nullptr) {
- DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
- // Make a best effort to find the type index in the method's dex file.
- // We could search all open dex files but that might turn expensive
- // and probably not worth it.
- class_dex_file = dex_file;
- type_index = cls->FindTypeIndexInOtherDexFile(*dex_file);
- } else {
- class_dex_file = &(cls->GetDexFile());
- type_index = cls->GetDexTypeIndex();
- }
- if (!type_index.IsValid()) {
- // Could be a proxy class or an array for which we couldn't find the type index.
- is_missing_types = true;
- continue;
+ const DexFile* class_dex_file = nullptr;
+ dex::TypeIndex type_index;
+
+ if (cls->GetDexCache() == nullptr) {
+ DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
+ // Make a best effort to find the type index in the method's dex file.
+ // We could search all open dex files but that might turn expensive
+ // and probably not worth it.
+ class_dex_file = dex_file;
+ type_index = cls->FindTypeIndexInOtherDexFile(*dex_file);
+ } else {
+ class_dex_file = &(cls->GetDexFile());
+ type_index = cls->GetDexTypeIndex();
+ }
+ if (!type_index.IsValid()) {
+ // Could be a proxy class or an array for which we couldn't find the type index.
+ is_missing_types = true;
+ continue;
+ }
+ if (ContainsElement(dex_base_locations,
+ DexFileLoader::GetBaseLocation(class_dex_file->GetLocation()))) {
+ // Only consider classes from the same apk (including multidex).
+ profile_classes.emplace_back(/*ProfileMethodInfo::ProfileClassReference*/
+ class_dex_file, type_index);
+ } else {
+ is_missing_types = true;
+ }
}
- if (ContainsElement(dex_base_locations,
- DexFileLoader::GetBaseLocation(class_dex_file->GetLocation()))) {
- // Only consider classes from the same apk (including multidex).
- profile_classes.emplace_back(/*ProfileMethodInfo::ProfileClassReference*/
- class_dex_file, type_index);
- } else {
- is_missing_types = true;
+ if (!profile_classes.empty()) {
+ inline_caches.emplace_back(/*ProfileMethodInfo::ProfileInlineCache*/
+ cache.dex_pc_, is_missing_types, profile_classes);
}
}
- if (!profile_classes.empty()) {
- inline_caches.emplace_back(/*ProfileMethodInfo::ProfileInlineCache*/
- cache.dex_pc_, is_missing_types, profile_classes);
- }
}
methods.emplace_back(/*ProfileMethodInfo*/
MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches);
@@ -1535,17 +1539,13 @@ bool JitCodeCache::NotifyCompilationOf(ArtMethod* method,
CompilationKind compilation_kind,
bool prejit) {
const void* existing_entry_point = method->GetEntryPointFromQuickCompiledCode();
- if (compilation_kind != CompilationKind::kOsr && ContainsPc(existing_entry_point)) {
- OatQuickMethodHeader* method_header =
- OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
- bool is_baseline = (compilation_kind == CompilationKind::kBaseline);
- if (CodeInfo::IsBaseline(method_header->GetOptimizedCodeInfoPtr()) == is_baseline) {
- VLOG(jit) << "Not compiling "
- << method->PrettyMethod()
- << " because it has already been compiled"
- << " kind=" << compilation_kind;
- return false;
- }
+ if (compilation_kind == CompilationKind::kBaseline && ContainsPc(existing_entry_point)) {
+ // The existing entry point is either already baseline, or optimized. No
+ // need to compile.
+ VLOG(jit) << "Not compiling "
+ << method->PrettyMethod()
+ << " baseline, because it has already been compiled";
+ return false;
}
if (method->NeedsClinitCheckBeforeCall() && !prejit) {