Reland^2 "Run optimizations with baseline compilation."

This reverts commit 3dccb13f4e92db37a13359e126c5ddc12cb674b5. Also includes the fix for incrementing hotness that got reverted: aosp/2906378 Bug: 313040662 Reduces jank on compose view scrolling for 4 iterations: - For Go Mokey: - Before: ~698 frames drawn / ~13.87% janky frames - After: ~937 frames drawn / ~5.52% janky frames - For Pixel 8 pro: - Before: ~2440 frames drawn / ~0.90% janky frames - After: ~2450 frames drawn / ~0.55% janky frames Reason for revert: Reduce inlining threshold for baseline. Change-Id: Iee5cd4c3ceb7715caf9299b56551aae6f0259769
author: Nicolas Geoffray <ngeoffray@google.com> 2024-01-29 14:24:31 +0000
committer: Nicolas Geoffray <ngeoffray@google.com> 2024-01-30 17:22:07 +0000
commit: 33e9f1a70d5f58639b524f40bf39a8e233c04ba8 (patch)
tree: f78d2949cac297400c0854ef57a4190f77d28e1b /compiler
parent: 516020a3fbfe3db43f7faf0ac3daf5a45dbeeb6b (diff)
21 files changed, 259 insertions, 82 deletions
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 5890227c36..f659b12959 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -65,6 +65,8 @@ class CompilerOptions final {
   static const bool kDefaultGenerateDebugInfo = false;
   static const bool kDefaultGenerateMiniDebugInfo = true;
   static const size_t kDefaultInlineMaxCodeUnits = 32;
+  // We set a lower inlining threshold for baseline to reduce code size and compilation time.
+  static const size_t kBaselineMaxCodeUnits = 8;
   static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
 
   enum class CompilerType : uint8_t {
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 523a666f8a..c14d5d37e8 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -226,5 +226,9 @@ bool JitCompiler::IsBaselineCompiler() const {
   return compiler_options_->IsBaseline();
 }
 
+uint32_t JitCompiler::GetInlineMaxCodeUnits() const {
+  return compiler_options_->GetInlineMaxCodeUnits();
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 5a919fb612..66aa545dd7 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -64,6 +64,8 @@ class JitCompiler : public JitCompilerInterface {
                                          bool compress,
                                          /*out*/ size_t* num_symbols) override;
 
+  uint32_t GetInlineMaxCodeUnits() const override;
+
  private:
   std::unique_ptr<CompilerOptions> compiler_options_;
   std::unique_ptr<Compiler> compiler_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 441a93c38f..c734922268 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -172,7 +172,8 @@ bool CodeGenerator::ShouldCheckGCCard(DataType::Type type,
       CodeGenerator::StoreNeedsWriteBarrier(type, value);
 
   DCHECK_IMPLIES(result, write_barrier_kind == WriteBarrierKind::kDontEmit);
-  DCHECK_IMPLIES(result, !GetGraph()->IsCompilingBaseline());
+  DCHECK_IMPLIES(
+      result, !(GetGraph()->IsCompilingBaseline() && compiler_options_.ProfileBranches()));
 
   return result;
 }
@@ -1633,7 +1634,7 @@ bool CodeGenerator::StoreNeedsWriteBarrier(DataType::Type type,
   // Check that null value is not represented as an integer constant.
   DCHECK_IMPLIES(type == DataType::Type::kReference, !value->IsIntConstant());
   // Branch profiling currently doesn't support running optimizations.
-  return GetGraph()->IsCompilingBaseline()
+  return (GetGraph()->IsCompilingBaseline() && compiler_options_.ProfileBranches())
             ? CodeGenerator::StoreNeedsWriteBarrier(type, value)
             : write_barrier_kind != WriteBarrierKind::kDontEmit;
 }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index f3b20ea117..1e3835dab6 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -847,8 +847,8 @@ class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
 
 class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit CompileOptimizedSlowPathARM64(Register profiling_info)
-      : SlowPathCodeARM64(/* instruction= */ nullptr),
+  CompileOptimizedSlowPathARM64(HSuspendCheck* check, Register profiling_info)
+      : SlowPathCodeARM64(check),
         profiling_info_(profiling_info) {}
 
   void EmitNativeCode(CodeGenerator* codegen) override {
@@ -861,10 +861,18 @@ class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
     __ Mov(counter, ProfilingInfo::GetOptimizeThreshold());
     __ Strh(counter,
             MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+    if (instruction_ != nullptr) {
+      // Only saves live vector regs for SIMD.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     __ Ldr(lr, MemOperand(tr, entrypoint_offset));
     // Note: we don't record the call here (and therefore don't generate a stack
     // map), as the entrypoint should never be suspended.
     __ Blr(lr);
+    if (instruction_ != nullptr) {
+      // Only restores live vector regs for SIMD.
+      RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    }
     __ B(GetExitLabel());
   }
 
@@ -1280,7 +1288,7 @@ void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instr
   GenerateMethodEntryExitHook(instruction);
 }
 
-void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
+void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
   MacroAssembler* masm = GetVIXLAssembler();
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     UseScratchRegisterScope temps(masm);
@@ -1303,11 +1311,10 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
     DCHECK(info != nullptr);
     DCHECK(!HasEmptyFrame());
     uint64_t address = reinterpret_cast64<uint64_t>(info);
-    vixl::aarch64::Label done;
     UseScratchRegisterScope temps(masm);
     Register counter = temps.AcquireW();
-    SlowPathCodeARM64* slow_path =
-        new (GetScopedAllocator()) CompileOptimizedSlowPathARM64(/* profiling_info= */ lr);
+    SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARM64(
+        suspend_check, /* profiling_info= */ lr);
     AddSlowPath(slow_path);
     __ Ldr(lr, jit_patches_.DeduplicateUint64Literal(address));
     __ Ldrh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
@@ -1431,7 +1438,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
     }
   }
-  MaybeIncrementHotness(/* is_frame_entry= */ true);
+  MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
 }
 
@@ -3741,7 +3748,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s
   HLoopInformation* info = block->GetLoopInformation();
 
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
+    codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;  // `GenerateSuspendCheck()` emitted the jump.
   }
@@ -4646,7 +4653,8 @@ void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction
   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
-    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
+        info, GetCompilerOptions(), instruction->AsInvoke());
     if (cache != nullptr) {
       uint64_t address = reinterpret_cast64<uint64_t>(cache);
       vixl::aarch64::Label done;
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index c78137b6ed..d10fb3018b 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -1027,7 +1027,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
   }
 
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass);
-  void MaybeIncrementHotness(bool is_frame_entry);
+  void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
 
   bool CanUseImplicitSuspendCheck() const;
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 544d35c206..c5c5f1b5ff 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -972,8 +972,9 @@ class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
 
 class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
  public:
-  explicit CompileOptimizedSlowPathARMVIXL(vixl32::Register profiling_info)
-      : SlowPathCodeARMVIXL(/* instruction= */ nullptr),
+  CompileOptimizedSlowPathARMVIXL(HSuspendCheck* suspend_check,
+                                  vixl32::Register profiling_info)
+      : SlowPathCodeARMVIXL(suspend_check),
         profiling_info_(profiling_info) {}
 
   void EmitNativeCode(CodeGenerator* codegen) override {
@@ -2276,7 +2277,8 @@ void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* ins
   GenerateMethodEntryExitHook(instruction);
 }
 
-void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
+void CodeGeneratorARMVIXL::MaybeIncrementHotness(HSuspendCheck* suspend_check,
+                                                 bool is_frame_entry) {
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     UseScratchRegisterScope temps(GetVIXLAssembler());
     vixl32::Register temp = temps.Acquire();
@@ -2307,8 +2309,8 @@ void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
     uint32_t address = reinterpret_cast32<uint32_t>(info);
     UseScratchRegisterScope temps(GetVIXLAssembler());
     vixl32::Register tmp = temps.Acquire();
-    SlowPathCodeARMVIXL* slow_path =
-        new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(/* profiling_info= */ lr);
+    SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(
+        suspend_check, /* profiling_info= */ lr);
     AddSlowPath(slow_path);
     __ Mov(lr, address);
     __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
@@ -2383,7 +2385,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
   if (HasEmptyFrame()) {
     // Ensure that the CFI opcode list is not empty.
     GetAssembler()->cfi().Nop();
-    MaybeIncrementHotness(/* is_frame_entry= */ true);
+    MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
     return;
   }
 
@@ -2483,7 +2485,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
   }
 
-  MaybeIncrementHotness(/* is_frame_entry= */ true);
+  MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
   MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
 }
 
@@ -2828,7 +2830,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock*
   HLoopInformation* info = block->GetLoopInformation();
 
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
+    codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -3688,7 +3690,8 @@ void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instructi
   DCHECK_EQ(r0.GetCode(), klass.GetCode());
   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
-    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
+        info, GetCompilerOptions(), instruction->AsInvoke());
     if (cache != nullptr) {
       uint32_t address = reinterpret_cast32<uint32_t>(cache);
       vixl32::Label done;
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index c5b24470bf..35686dfa50 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -899,7 +899,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
   }
 
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl32::Register klass);
-  void MaybeIncrementHotness(bool is_frame_entry);
+  void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
 
  private:
   // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
index 492c77be05..4c5234b0f7 100644
--- a/compiler/optimizing/code_generator_riscv64.cc
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -266,8 +266,8 @@ void LocationsBuilderRISCV64::HandleInvoke(HInvoke* instruction) {
 
 class CompileOptimizedSlowPathRISCV64 : public SlowPathCodeRISCV64 {
  public:
-  CompileOptimizedSlowPathRISCV64(XRegister base, int32_t imm12)
-      : SlowPathCodeRISCV64(/*instruction=*/ nullptr),
+  CompileOptimizedSlowPathRISCV64(HSuspendCheck* suspend_check, XRegister base, int32_t imm12)
+      : SlowPathCodeRISCV64(suspend_check),
         base_(base),
         imm12_(imm12) {}
 
@@ -280,10 +280,18 @@ class CompileOptimizedSlowPathRISCV64 : public SlowPathCodeRISCV64 {
     XRegister counter = srs.AllocateXRegister();
     __ LoadConst32(counter, ProfilingInfo::GetOptimizeThreshold());
     __ Sh(counter, base_, imm12_);
+    if (instruction_ != nullptr) {
+      // Only saves live vector regs for SIMD.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     __ Loadd(RA, TR, entrypoint_offset);
     // Note: we don't record the call here (and therefore don't generate a stack
     // map), as the entrypoint should never be suspended.
     __ Jalr(RA);
+    if (instruction_ != nullptr) {
+      // Only restores live vector regs for SIMD.
+      RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    }
     __ J(GetExitLabel());
   }
 
@@ -2009,7 +2017,7 @@ void InstructionCodeGeneratorRISCV64::HandleGoto(HInstruction* instruction,
   HLoopInformation* info = block->GetLoopInformation();
 
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->MaybeIncrementHotness(/*is_frame_entry=*/ false);
+    codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /*is_frame_entry=*/ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;  // `GenerateSuspendCheck()` emitted the jump.
   }
@@ -5734,7 +5742,8 @@ CodeGeneratorRISCV64::CodeGeneratorRISCV64(HGraph* graph,
   AddAllocatedRegister(Location::RegisterLocation(RA));
 }
 
-void CodeGeneratorRISCV64::MaybeIncrementHotness(bool is_frame_entry) {
+void CodeGeneratorRISCV64::MaybeIncrementHotness(HSuspendCheck* suspend_check,
+                                                 bool is_frame_entry) {
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     ScratchRegisterScope srs(GetAssembler());
     XRegister method = is_frame_entry ? kArtMethodRegister : srs.AllocateXRegister();
@@ -5766,7 +5775,7 @@ void CodeGeneratorRISCV64::MaybeIncrementHotness(bool is_frame_entry) {
     XRegister tmp = RA;
     __ LoadConst64(tmp, base_address);
     SlowPathCodeRISCV64* slow_path =
-        new (GetScopedAllocator()) CompileOptimizedSlowPathRISCV64(tmp, imm12);
+        new (GetScopedAllocator()) CompileOptimizedSlowPathRISCV64(suspend_check, tmp, imm12);
     AddSlowPath(slow_path);
     __ Lhu(counter, tmp, imm12);
     __ Beqz(counter, slow_path->GetEntryLabel());  // Can clobber `TMP` if taken.
@@ -5911,7 +5920,7 @@ void CodeGeneratorRISCV64::GenerateFrameEntry() {
       __ Storew(Zero, SP, GetStackOffsetOfShouldDeoptimizeFlag());
     }
   }
-  MaybeIncrementHotness(/*is_frame_entry=*/ true);
+  MaybeIncrementHotness(/* suspend_check= */ nullptr, /*is_frame_entry=*/ true);
 }
 
 void CodeGeneratorRISCV64::GenerateFrameExit() {
@@ -6774,7 +6783,8 @@ void CodeGeneratorRISCV64::MaybeGenerateInlineCacheCheck(HInstruction* instructi
   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
-    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
+        info, GetCompilerOptions(), instruction->AsInvoke());
     if (cache != nullptr) {
       uint64_t address = reinterpret_cast64<uint64_t>(cache);
       Riscv64Label done;
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
index ba43090590..653e83dd57 100644
--- a/compiler/optimizing/code_generator_riscv64.h
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -638,7 +638,7 @@ class CodeGeneratorRISCV64 : public CodeGenerator {
 
   void GenerateMemoryBarrier(MemBarrierKind kind);
 
-  void MaybeIncrementHotness(bool is_frame_entry);
+  void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
 
   bool CanUseImplicitSuspendCheck() const;
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 058ecf7242..4a0235e919 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -991,16 +991,24 @@ class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
 
 class CompileOptimizedSlowPathX86 : public SlowPathCode {
  public:
-  explicit CompileOptimizedSlowPathX86(uint32_t counter_address)
-      : SlowPathCode(/* instruction= */ nullptr),
+  CompileOptimizedSlowPathX86(HSuspendCheck* suspend_check, uint32_t counter_address)
+      : SlowPathCode(suspend_check),
         counter_address_(counter_address) {}
 
   void EmitNativeCode(CodeGenerator* codegen) override {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
     __ movw(Address::Absolute(counter_address_), Immediate(ProfilingInfo::GetOptimizeThreshold()));
+    if (instruction_ != nullptr) {
+      // Only saves full width XMM for SIMD.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     x86_codegen->GenerateInvokeRuntime(
         GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
+    if (instruction_ != nullptr) {
+      // Only restores full width XMM for SIMD.
+      RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    }
     __ jmp(GetExitLabel());
   }
 
@@ -1327,7 +1335,7 @@ void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruc
   GenerateMethodEntryExitHook(instruction);
 }
 
-void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
+void CodeGeneratorX86::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     Register reg = EAX;
     if (is_frame_entry) {
@@ -1350,12 +1358,15 @@ void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
   }
 
   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+    // Note the slow path doesn't save SIMD registers, so if we were to
+    // call it on loop back edge, we would need to fix this.
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
     uint32_t address = reinterpret_cast32<uint32_t>(info) +
         ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
     DCHECK(!HasEmptyFrame());
-    SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86(address);
+    SlowPathCode* slow_path =
+        new (GetScopedAllocator()) CompileOptimizedSlowPathX86(suspend_check, address);
     AddSlowPath(slow_path);
     // With multiple threads, this can overflow. This is OK, we will eventually get to see
     // it reaching 0. Also, at this point we have no register available to look
@@ -1442,7 +1453,7 @@ void CodeGeneratorX86::GenerateFrameEntry() {
     }
   }
 
-  MaybeIncrementHotness(/* is_frame_entry= */ true);
+  MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
 }
 
 void CodeGeneratorX86::GenerateFrameExit() {
@@ -1893,7 +1904,7 @@ void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* suc
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
+    codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -2849,7 +2860,8 @@ void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
-    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
+        info, GetCompilerOptions(), instruction->AsInvoke());
     if (cache != nullptr) {
       uint32_t address = reinterpret_cast32<uint32_t>(cache);
       if (kIsDebugBuild) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 8a4718143b..007ec81142 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -738,7 +738,7 @@ class CodeGeneratorX86 : public CodeGenerator {
   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
 
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass);
-  void MaybeIncrementHotness(bool is_frame_entry);
+  void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
 
   // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset.
   // The correct value will be inserted when processing Assembler fixups.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 705c14c009..8f56677f21 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1043,8 +1043,8 @@ class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
 
 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
  public:
-  explicit CompileOptimizedSlowPathX86_64(uint64_t counter_address)
-      : SlowPathCode(/* instruction= */ nullptr),
+  CompileOptimizedSlowPathX86_64(HSuspendCheck* suspend_check, uint64_t counter_address)
+      : SlowPathCode(suspend_check),
         counter_address_(counter_address) {}
 
   void EmitNativeCode(CodeGenerator* codegen) override {
@@ -1052,8 +1052,16 @@ class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
     __ Bind(GetEntryLabel());
     __ movq(CpuRegister(TMP), Immediate(counter_address_));
     __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold()));
+    if (instruction_ != nullptr) {
+      // Only saves full width XMM for SIMD.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     x86_64_codegen->GenerateInvokeRuntime(
         GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
+    if (instruction_ != nullptr) {
+      // Only restores full width XMM for SIMD.
+      RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    }
     __ jmp(GetExitLabel());
   }
 
@@ -1763,7 +1771,7 @@ void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instru
   GenerateMethodEntryExitHook(instruction);
 }
 
-void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
+void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
     NearLabel overflow;
     Register method = kMethodRegisterArgument;
@@ -1786,7 +1794,8 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
     CHECK(!HasEmptyFrame());
     uint64_t address = reinterpret_cast64<uint64_t>(info) +
         ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
-    SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(address);
+    SlowPathCode* slow_path =
+        new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(suspend_check, address);
     AddSlowPath(slow_path);
     // Note: if the address was in the 32bit range, we could use
     // Address::Absolute and avoid this movq.
@@ -1891,7 +1900,7 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
     }
   }
 
-  MaybeIncrementHotness(/* is_frame_entry= */ true);
+  MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
 }
 
 void CodeGeneratorX86_64::GenerateFrameExit() {
@@ -2078,7 +2087,7 @@ void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock*
 
   HLoopInformation* info = block->GetLoopInformation();
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
-    codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
+    codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
     return;
   }
@@ -3141,7 +3150,8 @@ void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instructio
   if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
     DCHECK(info != nullptr);
-    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+    InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
+        info, GetCompilerOptions(), instruction->AsInvoke());
     if (cache != nullptr) {
       uint64_t address = reinterpret_cast64<uint64_t>(cache);
       NearLabel done;
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index b9467f9f10..b8e2456381 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -707,7 +707,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls);
 
-  void MaybeIncrementHotness(bool is_frame_entry);
+  void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
 
   static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index b7f7a0f550..afbf941355 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -867,7 +867,8 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
     std::ostringstream oss;
     oss << pass_name_;
     if (!IsDebugDump()) {
-      oss << " (" << (is_after_pass_ ? "after" : "before")
+      oss << " (" << (GetGraph()->IsCompilingBaseline() ? "baseline " : "")
+          << (is_after_pass_ ? "after" : "before")
           << (graph_in_bad_state_ ? ", bad_state" : "") << ")";
     }
     PrintProperty("name", oss.str().c_str());
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 16abf0b0de..91f49c8ed8 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -37,6 +37,7 @@
 #include "mirror/object_array-alloc-inl.h"
 #include "mirror/object_array-inl.h"
 #include "nodes.h"
+#include "profiling_info_builder.h"
 #include "reference_type_propagation.h"
 #include "register_allocator_linear_scan.h"
 #include "scoped_thread_state_change-inl.h"
@@ -164,7 +165,9 @@ bool HInliner::Run() {
   // depending on the state of classes at runtime.
   const bool honor_noinline_directives = codegen_->GetCompilerOptions().CompileArtTest();
   const bool honor_inline_directives =
-      honor_noinline_directives && Runtime::Current()->IsAotCompiler();
+      honor_noinline_directives &&
+      Runtime::Current()->IsAotCompiler() &&
+      !graph_->IsCompilingBaseline();
 
   // Keep a copy of all blocks when starting the visit.
   ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
@@ -532,6 +535,15 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
     return result;
   }
 
+  if (graph_->IsCompilingBaseline()) {
+    LOG_FAIL_NO_STAT() << "Call to " << invoke_instruction->GetMethodReference().PrettyMethod()
+                       << " not inlined because we are compiling baseline and we could not"
+                       << " statically resolve the target";
+    // For baseline compilation, we will collect inline caches, so we should not
+    // try to inline using them.
+    return false;
+  }
+
   DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
 
   // No try catch inlining allowed here, or recursively. For try catch inlining we are banking on
@@ -682,17 +694,36 @@ HInliner::InlineCacheType HInliner::GetInlineCacheJIT(
   ArtMethod* caller = graph_->GetArtMethod();
   // Under JIT, we should always know the caller.
   DCHECK(caller != nullptr);
-  ProfilingInfo* profiling_info = graph_->GetProfilingInfo();
-  if (profiling_info == nullptr) {
-    return kInlineCacheNoData;
+
+  InlineCache* cache = nullptr;
+  // Start with the outer graph profiling info.
+  ProfilingInfo* profiling_info = outermost_graph_->GetProfilingInfo();
+  if (profiling_info != nullptr) {
+    if (depth_ == 0) {
+      cache = profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+    } else {
+      uint32_t dex_pc = ProfilingInfoBuilder::EncodeInlinedDexPc(
+          this, codegen_->GetCompilerOptions(), invoke_instruction);
+      if (dex_pc != kNoDexPc) {
+        cache = profiling_info->GetInlineCache(dex_pc);
+      }
+    }
+  }
+
+  if (cache == nullptr) {
+    // Check the current graph profiling info.
+    profiling_info = graph_->GetProfilingInfo();
+    if (profiling_info == nullptr) {
+      return kInlineCacheNoData;
+    }
+
+    cache = profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
   }
 
-  InlineCache* cache = profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
   if (cache == nullptr) {
-    // This shouldn't happen, but we don't guarantee that method resolution
-    // between baseline compilation and optimizing compilation is identical. Be robust,
-    // warn about it, and return that we don't have any inline cache data.
-    LOG(WARNING) << "No inline cache found for " << caller->PrettyMethod();
+    // Either we never hit this invoke and we never compiled the callee,
+    // or the method wasn't resolved when we performed baseline compilation.
+    // Bail for now.
     return kInlineCacheNoData;
   }
   Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(*cache, classes);
@@ -718,6 +749,12 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
 
   const ProfileCompilationInfo::InlineCacheMap* inline_caches = hotness.GetInlineCacheMap();
   DCHECK(inline_caches != nullptr);
+
+  // Inlined inline caches are not supported in AOT, so we use the dex pc directly, and don't
+  // call `InlineCache::EncodeDexPc`.
+  // To support it, we would need to ensure `inline_max_code_units` remain the
+  // same between dex2oat and runtime, for example by adding it to the boot
+  // image oat header.
   const auto it = inline_caches->find(invoke_instruction->GetDexPc());
   if (it == inline_caches->end()) {
     return kInlineCacheUninitialized;
@@ -1515,7 +1552,9 @@ bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction,
     return false;
   }
 
-  size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits();
+  size_t inline_max_code_units = graph_->IsCompilingBaseline()
+      ? CompilerOptions::kBaselineMaxCodeUnits
+      : codegen_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) {
     LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem)
         << "Method " << method->PrettyMethod()
@@ -2079,6 +2118,20 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
             << " could not be inlined because it needs a BSS check";
         return false;
       }
+
+      if (outermost_graph_->IsCompilingBaseline() &&
+          (current->IsInvokeVirtual() || current->IsInvokeInterface()) &&
+          ProfilingInfoBuilder::IsInlineCacheUseful(current->AsInvoke(), codegen_)) {
+        uint32_t maximum_inlining_depth_for_baseline =
+            InlineCache::MaxDexPcEncodingDepth(
+                outermost_graph_->GetArtMethod(),
+                codegen_->GetCompilerOptions().GetInlineMaxCodeUnits());
+        if (depth_ + 1 > maximum_inlining_depth_for_baseline) {
+          LOG_FAIL_NO_STAT() << "Reached maximum depth for inlining in baseline compilation: "
+                             << depth_ << " for " << callee_graph->GetArtMethod()->PrettyMethod();
+          return false;
+        }
+      }
     }
   }
 
@@ -2190,6 +2243,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
       // The current invoke is not a try block.
       !invoke_instruction->GetBlock()->IsTryBlock();
   RunOptimizations(callee_graph,
+                   invoke_instruction->GetEnvironment(),
                    code_item,
                    dex_compilation_unit,
                    try_catch_inlining_allowed_for_recursive_inline);
@@ -2229,6 +2283,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
 }
 
 void HInliner::RunOptimizations(HGraph* callee_graph,
+                                HEnvironment* caller_environment,
                                 const dex::CodeItem* code_item,
                                 const DexCompilationUnit& dex_compilation_unit,
                                 bool try_catch_inlining_allowed_for_recursive_inline) {
@@ -2277,6 +2332,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph,
                    total_number_of_dex_registers_ + accessor.RegistersSize(),
                    total_number_of_instructions_ + number_of_instructions,
                    this,
+                   caller_environment,
                    depth_ + 1,
                    try_catch_inlining_allowed_for_recursive_inline);
   inliner.Run();
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 5e68dd866e..48600543c6 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -43,6 +43,7 @@ class HInliner : public HOptimization {
            size_t total_number_of_dex_registers,
            size_t total_number_of_instructions,
            HInliner* parent,
+           HEnvironment* caller_environment,
            size_t depth,
            bool try_catch_inlining_allowed,
            const char* name = kInlinerPassName)
@@ -54,6 +55,7 @@ class HInliner : public HOptimization {
         total_number_of_dex_registers_(total_number_of_dex_registers),
         total_number_of_instructions_(total_number_of_instructions),
         parent_(parent),
+        caller_environment_(caller_environment),
         depth_(depth),
         inlining_budget_(0),
         try_catch_inlining_allowed_(try_catch_inlining_allowed),
@@ -64,6 +66,12 @@ class HInliner : public HOptimization {
 
   static constexpr const char* kInlinerPassName = "inliner";
 
+  const HInliner* GetParent() const { return parent_; }
+  const HEnvironment* GetCallerEnvironment() const { return caller_environment_; }
+
+  const HGraph* GetOutermostGraph() const { return outermost_graph_; }
+  const HGraph* GetGraph() const { return graph_; }
+
  private:
   enum InlineCacheType {
     kInlineCacheNoData = 0,
@@ -109,6 +117,7 @@ class HInliner : public HOptimization {
 
   // Run simple optimizations on `callee_graph`.
   void RunOptimizations(HGraph* callee_graph,
+                        HEnvironment* caller_environment,
                         const dex::CodeItem* code_item,
                         const DexCompilationUnit& dex_compilation_unit,
                         bool try_catch_inlining_allowed_for_recursive_inline)
@@ -321,9 +330,10 @@ class HInliner : public HOptimization {
   const size_t total_number_of_dex_registers_;
   size_t total_number_of_instructions_;
 
-  // The 'parent' inliner, that means the inlinigng optimization that requested
+  // The 'parent' inliner, that means the inlining optimization that requested
   // `graph_` to be inlined.
   const HInliner* const parent_;
+  const HEnvironment* const caller_environment_;
   const size_t depth_;
 
   // The budget left for inlining, in number of instructions.
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 9c1506afa5..dd57100d88 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -252,6 +252,7 @@ ArenaVector<HOptimization*> ConstructOptimizations(
                                        accessor.RegistersSize(),
                                        /* total_number_of_instructions= */ 0,
                                        /* parent= */ nullptr,
+                                       /* caller_environment= */ nullptr,
                                        /* depth= */ 0,
                                        /* try_catch_inlining_allowed= */ true,
                                        pass_name);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 2f795e6e70..70d9013f7d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -368,10 +368,10 @@ class OptimizingCompiler final : public Compiler {
                             const DexCompilationUnit& dex_compilation_unit,
                             PassObserver* pass_observer) const;
 
-  bool RunBaselineOptimizations(HGraph* graph,
-                                CodeGenerator* codegen,
-                                const DexCompilationUnit& dex_compilation_unit,
-                                PassObserver* pass_observer) const;
+  bool RunRequiredPasses(HGraph* graph,
+                         CodeGenerator* codegen,
+                         const DexCompilationUnit& dex_compilation_unit,
+                         PassObserver* pass_observer) const;
 
   std::vector<uint8_t> GenerateJitDebugInfo(const debug::MethodDebugInfo& method_debug_info);
 
@@ -444,10 +444,10 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) {
          instruction_set == InstructionSet::kX86_64;
 }
 
-bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
-                                                  CodeGenerator* codegen,
-                                                  const DexCompilationUnit& dex_compilation_unit,
-                                                  PassObserver* pass_observer) const {
+bool OptimizingCompiler::RunRequiredPasses(HGraph* graph,
+                                           CodeGenerator* codegen,
+                                           const DexCompilationUnit& dex_compilation_unit,
+                                           PassObserver* pass_observer) const {
   switch (codegen->GetCompilerOptions().GetInstructionSet()) {
 #if defined(ART_ENABLE_CODEGEN_arm)
     case InstructionSet::kThumb2:
@@ -904,21 +904,15 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
     }
   }
 
-  if (compilation_kind == CompilationKind::kBaseline) {
-    RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer);
+  if (compilation_kind == CompilationKind::kBaseline && compiler_options.ProfileBranches()) {
+    // Branch profiling currently doesn't support running optimizations.
+    RunRequiredPasses(graph, codegen.get(), dex_compilation_unit, &pass_observer);
   } else {
     RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer);
     PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer);
     WriteBarrierElimination(graph, compilation_stats_.get()).Run();
   }
 
-  RegisterAllocator::Strategy regalloc_strategy =
-    compiler_options.GetRegisterAllocationStrategy();
-  AllocateRegisters(graph,
-                    codegen.get(),
-                    &pass_observer,
-                    regalloc_strategy,
-                    compilation_stats_.get());
   // If we are compiling baseline and we haven't created a profiling info for
   // this method already, do it now.
   if (jit != nullptr &&
@@ -935,6 +929,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
     }
   }
 
+  RegisterAllocator::Strategy regalloc_strategy =
+    compiler_options.GetRegisterAllocationStrategy();
+  AllocateRegisters(graph,
+                    codegen.get(),
+                    &pass_observer,
+                    regalloc_strategy,
+                    compilation_stats_.get());
+
   codegen->Compile();
   pass_observer.DumpDisassembly();
 
diff --git a/compiler/optimizing/profiling_info_builder.cc b/compiler/optimizing/profiling_info_builder.cc
index 7888753830..19795f5466 100644
--- a/compiler/optimizing/profiling_info_builder.cc
+++ b/compiler/optimizing/profiling_info_builder.cc
@@ -20,6 +20,7 @@
 #include "code_generator.h"
 #include "driver/compiler_options.h"
 #include "dex/code_item_accessors-inl.h"
+#include "inliner.h"
 #include "jit/profiling_info.h"
 #include "optimizing_compiler_stats.h"
 #include "scoped_thread_state_change-inl.h"
@@ -42,10 +43,53 @@ void ProfilingInfoBuilder::Run() {
       ProfilingInfo::Create(soa.Self(), GetGraph()->GetArtMethod(), inline_caches_));
 }
 
+
+uint32_t ProfilingInfoBuilder::EncodeInlinedDexPc(const HInliner* inliner,
+                                                  const CompilerOptions& compiler_options,
+                                                  HInvoke* invoke) {
+  DCHECK(inliner->GetCallerEnvironment() != nullptr);
+  DCHECK(inliner->GetParent() != nullptr);
+  std::vector<uint32_t> temp_vector;
+  temp_vector.push_back(invoke->GetDexPc());
+  while (inliner->GetCallerEnvironment() != nullptr) {
+    temp_vector.push_back(inliner->GetCallerEnvironment()->GetDexPc());
+    inliner = inliner->GetParent();
+  }
+
+  DCHECK_EQ(inliner->GetOutermostGraph(), inliner->GetGraph());
+  return InlineCache::EncodeDexPc(
+      inliner->GetOutermostGraph()->GetArtMethod(),
+      temp_vector,
+      compiler_options.GetInlineMaxCodeUnits());
+}
+
+static uint32_t EncodeDexPc(HInvoke* invoke, const CompilerOptions& compiler_options) {
+  std::vector<uint32_t> dex_pcs;
+  ArtMethod* outer_method = nullptr;
+  for (HEnvironment* environment = invoke->GetEnvironment();
+       environment != nullptr;
+       environment = environment->GetParent()) {
+    outer_method = environment->GetMethod();
+    dex_pcs.push_back(environment->GetDexPc());
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+  return InlineCache::EncodeDexPc(
+      outer_method,
+      dex_pcs,
+      compiler_options.GetInlineMaxCodeUnits());
+}
+
 void ProfilingInfoBuilder::HandleInvoke(HInvoke* invoke) {
-  DCHECK(!invoke->GetEnvironment()->IsFromInlinedInvoke());
   if (IsInlineCacheUseful(invoke, codegen_)) {
-    inline_caches_.push_back(invoke->GetDexPc());
+    uint32_t dex_pc = EncodeDexPc(invoke, compiler_options_);
+    if (dex_pc != kNoDexPc) {
+      inline_caches_.push_back(dex_pc);
+    } else {
+      ScopedObjectAccess soa(Thread::Current());
+      LOG(WARNING) << "Could not encode dex pc for "
+                   << invoke->GetResolvedMethod()->PrettyMethod();
+    }
   }
 }
 
@@ -81,10 +125,15 @@ bool ProfilingInfoBuilder::IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* c
   return true;
 }
 
-InlineCache* ProfilingInfoBuilder::GetInlineCache(ProfilingInfo* info, HInvoke* instruction) {
-  DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+InlineCache* ProfilingInfoBuilder::GetInlineCache(ProfilingInfo* info,
+                                                  const CompilerOptions& compiler_options,
+                                                  HInvoke* instruction) {
   ScopedObjectAccess soa(Thread::Current());
-  return info->GetInlineCache(instruction->GetDexPc());
+  uint32_t dex_pc = EncodeDexPc(instruction, compiler_options);
+  if (dex_pc == kNoDexPc) {
+    return nullptr;
+  }
+  return info->GetInlineCache(dex_pc);
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/profiling_info_builder.h b/compiler/optimizing/profiling_info_builder.h
index 2185b0eed3..c8dc59a03c 100644
--- a/compiler/optimizing/profiling_info_builder.h
+++ b/compiler/optimizing/profiling_info_builder.h
@@ -24,6 +24,7 @@ namespace art HIDDEN {
 
 class CodeGenerator;
 class CompilerOptions;
+class HInliner;
 class InlineCache;
 class ProfilingInfo;
 
@@ -42,8 +43,13 @@ class ProfilingInfoBuilder : public HGraphDelegateVisitor {
   static constexpr const char* kProfilingInfoBuilderPassName =
       "profiling_info_builder";
 
-  static InlineCache* GetInlineCache(ProfilingInfo* info, HInvoke* invoke);
+  static InlineCache* GetInlineCache(ProfilingInfo* info,
+                                     const CompilerOptions& compiler_options,
+                                     HInvoke* invoke);
   static bool IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* codegen);
+  static uint32_t EncodeInlinedDexPc(
+      const HInliner* inliner, const CompilerOptions& compiler_options, HInvoke* invoke)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   void VisitInvokeVirtual(HInvokeVirtual* invoke) override;
@@ -52,7 +58,7 @@ class ProfilingInfoBuilder : public HGraphDelegateVisitor {
   void HandleInvoke(HInvoke* invoke);
 
   CodeGenerator* codegen_;
-  [[maybe_unused]] const CompilerOptions& compiler_options_;
+  const CompilerOptions& compiler_options_;
   std::vector<uint32_t> inline_caches_;
 
   DISALLOW_COPY_AND_ASSIGN(ProfilingInfoBuilder);
author	Nicolas Geoffray <ngeoffray@google.com>	2024-01-29 14:24:31 +0000
committer	Nicolas Geoffray <ngeoffray@google.com>	2024-01-30 17:22:07 +0000
commit	33e9f1a70d5f58639b524f40bf39a8e233c04ba8 (patch)
tree	f78d2949cac297400c0854ef57a4190f77d28e1b /compiler
parent	516020a3fbfe3db43f7faf0ac3daf5a45dbeeb6b (diff)