Only compile optimized if it is useful.

If profiling doesn't benefit the method, switch a baseline compilation into optimized. Reduces the number of JIT compilations on the Sheets benchmark from ~3100 (2250 baseline, 850 optimized) to ~2750 (2250 baseline, 500 optimized). Test: test.py Change-Id: I94760481d130d2dc168152daa94429baf201f66e
author: Nicolas Geoffray <ngeoffray@google.com> 2024-02-07 11:53:09 +0000
committer: Nicolas Geoffray <ngeoffray@google.com> 2024-02-08 15:56:21 +0000
commit: e872656585952f993eb84633a66e0aedcbdf52ac (patch)
tree: 82f08a5d1dd1ca5247810b20a92c5a56a48b34a1
parent: 03ca5cf9db4110962700d47b7b5bd04592cac157 (diff)
10 files changed, 126 insertions, 88 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5ba26b4754..e22b24ef2f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1306,7 +1306,9 @@ void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, boo
     __ Bind(&done);
   }
 
-  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+  if (GetGraph()->IsCompilingBaseline() &&
+      GetGraph()->IsUsefulOptimizing() &&
+      !Runtime::Current()->IsAotCompiler()) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
     DCHECK(!HasEmptyFrame());
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 85f61f5303..75fae4e859 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2302,7 +2302,9 @@ void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check,
     }
   }
 
-  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+  if (GetGraph()->IsCompilingBaseline() &&
+      GetGraph()->IsUsefulOptimizing() &&
+      !Runtime::Current()->IsAotCompiler()) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
     DCHECK(!HasEmptyFrame());
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
index ed57683e0a..93bd35b618 100644
--- a/compiler/optimizing/code_generator_riscv64.cc
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -5763,7 +5763,9 @@ void CodeGeneratorRISCV64::MaybeIncrementHotness(HSuspendCheck* suspend_check,
     __ Bind(&done);
   }
 
-  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+  if (GetGraph()->IsCompilingBaseline() &&
+      GetGraph()->IsUsefulOptimizing() &&
+      !Runtime::Current()->IsAotCompiler()) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
     DCHECK(!HasEmptyFrame());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a61dca3022..21d3492e8a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1357,9 +1357,9 @@ void CodeGeneratorX86::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool
     }
   }
 
-  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
-    // Note the slow path doesn't save SIMD registers, so if we were to
-    // call it on loop back edge, we would need to fix this.
+  if (GetGraph()->IsCompilingBaseline() &&
+      GetGraph()->IsUsefulOptimizing() &&
+      !Runtime::Current()->IsAotCompiler()) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
     uint32_t address = reinterpret_cast32<uint32_t>(info) +
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index db4062b00d..af6c6255e5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1788,7 +1788,9 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bo
     __ Bind(&overflow);
   }
 
-  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+  if (GetGraph()->IsCompilingBaseline() &&
+      GetGraph()->IsUsefulOptimizing() &&
+      !Runtime::Current()->IsAotCompiler()) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
     CHECK(!HasEmptyFrame());
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index fd3e787fc8..d7ca17b646 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -541,6 +541,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
                        << " statically resolve the target";
     // For baseline compilation, we will collect inline caches, so we should not
     // try to inline using them.
+    outermost_graph_->SetUsefulOptimizing();
     return false;
   }
 
@@ -1552,9 +1553,7 @@ bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction,
     return false;
   }
 
-  size_t inline_max_code_units = graph_->IsCompilingBaseline()
-      ? CompilerOptions::kBaselineInlineMaxCodeUnits
-      : codegen_->GetCompilerOptions().GetInlineMaxCodeUnits();
+  size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) {
     LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem)
         << "Method " << method->PrettyMethod()
@@ -1565,6 +1564,14 @@ bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction,
     return false;
   }
 
+  if (graph_->IsCompilingBaseline() &&
+      accessor.InsnsSizeInCodeUnits() > CompilerOptions::kBaselineInlineMaxCodeUnits) {
+    LOG_FAIL_NO_STAT() << "Reached baseline maximum code unit for inlining  "
+                       << method->PrettyMethod();
+    outermost_graph_->SetUsefulOptimizing();
+    return false;
+  }
+
   if (invoke_instruction->GetBlock()->GetLastInstruction()->IsThrow()) {
     LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEndsWithThrow)
         << "Method " << method->PrettyMethod()
@@ -2129,6 +2136,7 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
         if (depth_ + 1 > maximum_inlining_depth_for_baseline) {
           LOG_FAIL_NO_STAT() << "Reached maximum depth for inlining in baseline compilation: "
                              << depth_ << " for " << callee_graph->GetArtMethod()->PrettyMethod();
+          outermost_graph_->SetUsefulOptimizing();
           return false;
         }
       }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c862e31de7..367f45f3a4 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -425,6 +425,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
         cached_current_method_(nullptr),
         art_method_(nullptr),
         compilation_kind_(compilation_kind),
+        useful_optimizing_(false),
         cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
@@ -742,6 +743,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
   void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; }
   void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; }
 
+  void SetUsefulOptimizing() { useful_optimizing_ = true; }
+  bool IsUsefulOptimizing() const { return useful_optimizing_; }
+
  private:
   void RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
@@ -897,6 +901,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
   // directly jump to.
   const CompilationKind compilation_kind_;
 
+  // Whether after compiling baseline it is still useful re-optimizing this
+  // method.
+  bool useful_optimizing_;
+
   // List of methods that are assumed to have single implementation.
   ArenaSet<ArtMethod*> cha_single_implementation_list_;
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index a1c4130bc1..65e8e51712 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -905,6 +905,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
   }
 
   if (compilation_kind == CompilationKind::kBaseline && compiler_options.ProfileBranches()) {
+    graph->SetUsefulOptimizing();
     // Branch profiling currently doesn't support running optimizations.
     RunRequiredPasses(graph, codegen.get(), dex_compilation_unit, &pass_observer);
   } else {
@@ -917,6 +918,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
   // this method already, do it now.
   if (jit != nullptr &&
       compilation_kind == CompilationKind::kBaseline &&
+      graph->IsUsefulOptimizing() &&
       graph->GetProfilingInfo() == nullptr) {
     ProfilingInfoBuilder(
         graph, codegen->GetCompilerOptions(), codegen.get(), compilation_stats_.get()).Run();
@@ -1448,6 +1450,11 @@ bool OptimizingCompiler::JitCompile(Thread* self,
     debug_info = GenerateJitDebugInfo(info);
   }
 
+  if (compilation_kind == CompilationKind::kBaseline &&
+      !codegen->GetGraph()->IsUsefulOptimizing()) {
+    compilation_kind = CompilationKind::kOptimized;
+  }
+
   if (!code_cache->Commit(self,
                           region,
                           method,
diff --git a/compiler/optimizing/profiling_info_builder.cc b/compiler/optimizing/profiling_info_builder.cc
index 19795f5466..f6cf676813 100644
--- a/compiler/optimizing/profiling_info_builder.cc
+++ b/compiler/optimizing/profiling_info_builder.cc
@@ -28,6 +28,7 @@
 namespace art HIDDEN {
 
 void ProfilingInfoBuilder::Run() {
+  DCHECK(GetGraph()->IsUsefulOptimizing());
   DCHECK_EQ(GetGraph()->GetProfilingInfo(), nullptr);
   // Order does not matter.
   for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
@@ -122,6 +123,12 @@ bool ProfilingInfoBuilder::IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* c
       return false;
     }
   }
+
+  if (!codegen->GetGraph()->IsUsefulOptimizing()) {
+    // Earlier pass knew what the calling target was. No need for an inline
+    // cache.
+    return false;
+  }
   return true;
 }
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 7bfbe15059..3560ac17ff 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1429,18 +1429,20 @@ void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_loca
   ScopedTrace trace(__FUNCTION__);
   Thread* self = Thread::Current();
   WaitUntilInlineCacheAccessible(self);
-  std::vector<ProfilingInfo*> copies;
+  SafeMap<ArtMethod*, ProfilingInfo*> profiling_infos;
+  std::vector<ArtMethod*> copies;
   // TODO: Avoid read barriers for potentially dead methods.
   // ScopedDebugDisallowReadBarriers sddrb(self);
   {
     MutexLock mu(self, *Locks::jit_lock_);
-    copies.reserve(profiling_infos_.size());
-    for (const auto& entry : profiling_infos_) {
+    profiling_infos = profiling_infos_;
+    for (const auto& entry : method_code_map_) {
       copies.push_back(entry.second);
     }
   }
-  for (ProfilingInfo* info : copies) {
-    ArtMethod* method = info->GetMethod();
+  for (ArtMethod* method : copies) {
+    auto it = profiling_infos.find(method);
+    ProfilingInfo* info = (it == profiling_infos.end()) ? nullptr : it->second;
     const DexFile* dex_file = method->GetDexFile();
     const std::string base_location = DexFileLoader::GetBaseLocation(dex_file->GetLocation());
     if (!ContainsElement(dex_base_locations, base_location)) {
@@ -1449,74 +1451,76 @@ void JitCodeCache::GetProfiledMethods(const std::set<std::string>& dex_base_loca
     }
     std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
 
-    // If the method is still baseline compiled and doesn't meet the inline cache threshold, don't
-    // save the inline caches because they might be incomplete.
-    // Although we don't deoptimize for incomplete inline caches in AOT-compiled code, inlining
-    // leads to larger generated code.
-    // If the inline cache is empty the compiler will generate a regular invoke virtual/interface.
-    const void* entry_point = method->GetEntryPointFromQuickCompiledCode();
-    if (ContainsPc(entry_point) &&
-        CodeInfo::IsBaseline(
-            OatQuickMethodHeader::FromEntryPoint(entry_point)->GetOptimizedCodeInfoPtr()) &&
-        (ProfilingInfo::GetOptimizeThreshold() - info->GetBaselineHotnessCount()) <
-            inline_cache_threshold) {
-      methods.emplace_back(/*ProfileMethodInfo*/
-          MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches);
-      continue;
-    }
-
-    for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
-      std::vector<TypeReference> profile_classes;
-      const InlineCache& cache = info->GetInlineCaches()[i];
-      ArtMethod* caller = info->GetMethod();
-      bool is_missing_types = false;
-      for (size_t k = 0; k < InlineCache::kIndividualCacheSize; k++) {
-        mirror::Class* cls = cache.classes_[k].Read();
-        if (cls == nullptr) {
-          break;
-        }
+    if (info != nullptr) {
+      // If the method is still baseline compiled and doesn't meet the inline cache threshold, don't
+      // save the inline caches because they might be incomplete.
+      // Although we don't deoptimize for incomplete inline caches in AOT-compiled code, inlining
+      // leads to larger generated code.
+      // If the inline cache is empty the compiler will generate a regular invoke virtual/interface.
+      const void* entry_point = method->GetEntryPointFromQuickCompiledCode();
+      if (ContainsPc(entry_point) &&
+          CodeInfo::IsBaseline(
+              OatQuickMethodHeader::FromEntryPoint(entry_point)->GetOptimizedCodeInfoPtr()) &&
+          (ProfilingInfo::GetOptimizeThreshold() - info->GetBaselineHotnessCount()) <
+              inline_cache_threshold) {
+        methods.emplace_back(/*ProfileMethodInfo*/
+            MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches);
+        continue;
+      }
 
-        // Check if the receiver is in the boot class path or if it's in the
-        // same class loader as the caller. If not, skip it, as there is not
-        // much we can do during AOT.
-        if (!cls->IsBootStrapClassLoaded() &&
-            caller->GetClassLoader() != cls->GetClassLoader()) {
-          is_missing_types = true;
-          continue;
-        }
+      for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
+        std::vector<TypeReference> profile_classes;
+        const InlineCache& cache = info->GetInlineCaches()[i];
+        ArtMethod* caller = info->GetMethod();
+        bool is_missing_types = false;
+        for (size_t k = 0; k < InlineCache::kIndividualCacheSize; k++) {
+          mirror::Class* cls = cache.classes_[k].Read();
+          if (cls == nullptr) {
+            break;
+          }
 
-        const DexFile* class_dex_file = nullptr;
-        dex::TypeIndex type_index;
+          // Check if the receiver is in the boot class path or if it's in the
+          // same class loader as the caller. If not, skip it, as there is not
+          // much we can do during AOT.
+          if (!cls->IsBootStrapClassLoaded() &&
+              caller->GetClassLoader() != cls->GetClassLoader()) {
+            is_missing_types = true;
+            continue;
+          }
 
-        if (cls->GetDexCache() == nullptr) {
-          DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
-          // Make a best effort to find the type index in the method's dex file.
-          // We could search all open dex files but that might turn expensive
-          // and probably not worth it.
-          class_dex_file = dex_file;
-          type_index = cls->FindTypeIndexInOtherDexFile(*dex_file);
-        } else {
-          class_dex_file = &(cls->GetDexFile());
-          type_index = cls->GetDexTypeIndex();
-        }
-        if (!type_index.IsValid()) {
-          // Could be a proxy class or an array for which we couldn't find the type index.
-          is_missing_types = true;
-          continue;
+          const DexFile* class_dex_file = nullptr;
+          dex::TypeIndex type_index;
+
+          if (cls->GetDexCache() == nullptr) {
+            DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
+            // Make a best effort to find the type index in the method's dex file.
+            // We could search all open dex files but that might turn expensive
+            // and probably not worth it.
+            class_dex_file = dex_file;
+            type_index = cls->FindTypeIndexInOtherDexFile(*dex_file);
+          } else {
+            class_dex_file = &(cls->GetDexFile());
+            type_index = cls->GetDexTypeIndex();
+          }
+          if (!type_index.IsValid()) {
+            // Could be a proxy class or an array for which we couldn't find the type index.
+            is_missing_types = true;
+            continue;
+          }
+          if (ContainsElement(dex_base_locations,
+                              DexFileLoader::GetBaseLocation(class_dex_file->GetLocation()))) {
+            // Only consider classes from the same apk (including multidex).
+            profile_classes.emplace_back(/*ProfileMethodInfo::ProfileClassReference*/
+                class_dex_file, type_index);
+          } else {
+            is_missing_types = true;
+          }
         }
-        if (ContainsElement(dex_base_locations,
-                            DexFileLoader::GetBaseLocation(class_dex_file->GetLocation()))) {
-          // Only consider classes from the same apk (including multidex).
-          profile_classes.emplace_back(/*ProfileMethodInfo::ProfileClassReference*/
-              class_dex_file, type_index);
-        } else {
-          is_missing_types = true;
+        if (!profile_classes.empty()) {
+          inline_caches.emplace_back(/*ProfileMethodInfo::ProfileInlineCache*/
+              cache.dex_pc_, is_missing_types, profile_classes);
         }
       }
-      if (!profile_classes.empty()) {
-        inline_caches.emplace_back(/*ProfileMethodInfo::ProfileInlineCache*/
-            cache.dex_pc_, is_missing_types, profile_classes);
-      }
     }
     methods.emplace_back(/*ProfileMethodInfo*/
         MethodReference(dex_file, method->GetDexMethodIndex()), inline_caches);
@@ -1535,17 +1539,13 @@ bool JitCodeCache::NotifyCompilationOf(ArtMethod* method,
                                        CompilationKind compilation_kind,
                                        bool prejit) {
   const void* existing_entry_point = method->GetEntryPointFromQuickCompiledCode();
-  if (compilation_kind != CompilationKind::kOsr && ContainsPc(existing_entry_point)) {
-    OatQuickMethodHeader* method_header =
-        OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
-    bool is_baseline = (compilation_kind == CompilationKind::kBaseline);
-    if (CodeInfo::IsBaseline(method_header->GetOptimizedCodeInfoPtr()) == is_baseline) {
-      VLOG(jit) << "Not compiling "
-                << method->PrettyMethod()
-                << " because it has already been compiled"
-                << " kind=" << compilation_kind;
-      return false;
-    }
+  if (compilation_kind == CompilationKind::kBaseline && ContainsPc(existing_entry_point)) {
+    // The existing entry point is either already baseline, or optimized. No
+    // need to compile.
+    VLOG(jit) << "Not compiling "
+              << method->PrettyMethod()
+              << " baseline, because it has already been compiled";
+    return false;
   }
 
   if (method->NeedsClinitCheckBeforeCall() && !prejit) {
author	Nicolas Geoffray <ngeoffray@google.com>	2024-02-07 11:53:09 +0000
committer	Nicolas Geoffray <ngeoffray@google.com>	2024-02-08 15:56:21 +0000
commit	e872656585952f993eb84633a66e0aedcbdf52ac (patch)
tree	82f08a5d1dd1ca5247810b20a92c5a56a48b34a1
parent	03ca5cf9db4110962700d47b7b5bd04592cac157 (diff)