Update method exit hooks from JITed code to not use Stack visitor

Using NthCallerStackVisitor is expensive since that involves decoding
method header and other tasks that are reasonably expensive especially
when called on every method exit. When calling method exit hooks from
JITed code a lot of this information like the frame_size, calling method
are already known and can be directly passed to the method exit hook
instead of computing them.

Locally this change improves the performance by 70% on debuggable-cc
config of golem benchmarks.

Bug: 253232638
Test: art/test.py
Change-Id: I3a1d80748c6d85e5fa1d3bd4aec0b29962ba0156
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index c1afdb8..3dfb741 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -199,6 +199,10 @@
   return ArmManagedRegister::FromCoreRegister(R0);
 }
 
+ManagedRegister ArmManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+  return ArmManagedRegister::FromCoreRegister(R2);
+}
+
 void ArmManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) {
   ManagedRuntimeCallingConvention::ResetIterator(displacement);
   gpr_index_ = 1u;  // Skip r0 for ArtMethod*
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 4526d9e..353e3cf 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -39,6 +39,7 @@
   void ResetIterator(FrameOffset displacement) override;
   // Managed runtime calling convention
   ManagedRegister MethodRegister() override;
+  ManagedRegister ArgumentRegisterForMethodExitHook() override;
   void Next() override;
   bool IsCurrentParamInRegister() override;
   bool IsCurrentParamOnStack() override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index ec77db3..3f9ed50 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -174,6 +174,10 @@
   return Arm64ManagedRegister::FromXRegister(X0);
 }
 
+ManagedRegister Arm64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+  return Arm64ManagedRegister::FromXRegister(X4);
+}
+
 bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
   if (IsCurrentParamAFloatOrDouble()) {
     return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 176271e..b948bbe 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -35,6 +35,7 @@
   ManagedRegister ReturnRegister() const override;
   // Managed runtime calling convention
   ManagedRegister MethodRegister() override;
+  ManagedRegister ArgumentRegisterForMethodExitHook() override;
   bool IsCurrentParamInRegister() override;
   bool IsCurrentParamOnStack() override;
   ManagedRegister CurrentParamRegister() override;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index e2f3bfb..8fe8e00 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -244,6 +244,11 @@
   // Register that holds the incoming method argument
   virtual ManagedRegister MethodRegister() = 0;
 
+  // Register that is used to pass frame size for method exit hook call. This
+  // shouldn't be the same as the return register since method exit hook also expects
+  // return values in the return register.
+  virtual ManagedRegister ArgumentRegisterForMethodExitHook() = 0;
+
   // Iterator interface
   bool HasNext();
   virtual void Next();
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index d520daa..a1ccabf 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -689,6 +689,7 @@
     // Method exit hooks is called just before tearing down the frame. So there are no live
     // registers and we can directly call the method exit hook and don't need a Jni specific
     // entrypoint.
+    __ Move(mr_conv->ArgumentRegisterForMethodExitHook(), managed_frame_size);
     __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pMethodExitHook));
     __ Jump(method_exit_hook_return.get());
   }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 65be92c..b56d0a1 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -143,6 +143,10 @@
   return X86ManagedRegister::FromCpuRegister(EAX);
 }
 
+ManagedRegister X86ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+  return X86ManagedRegister::FromCpuRegister(EBX);
+}
+
 void X86ManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) {
   ManagedRuntimeCallingConvention::ResetIterator(displacement);
   gpr_arg_count_ = 1u;  // Skip EAX for ArtMethod*
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index cd7ef5b..ebcd266 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -37,6 +37,7 @@
   void ResetIterator(FrameOffset displacement) override;
   // Managed runtime calling convention
   ManagedRegister MethodRegister() override;
+  ManagedRegister ArgumentRegisterForMethodExitHook() override;
   void Next() override;
   bool IsCurrentParamInRegister() override;
   bool IsCurrentParamOnStack() override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 862ee5e..8a472b3 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -147,6 +147,10 @@
   return X86_64ManagedRegister::FromCpuRegister(RDI);
 }
 
+ManagedRegister X86_64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+  return X86_64ManagedRegister::FromCpuRegister(R8);
+}
+
 bool X86_64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
   if (IsCurrentParamAFloatOrDouble()) {
     return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 483f1f5..67d63b8 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -35,6 +35,7 @@
   ManagedRegister ReturnRegister() const override;
   // Managed runtime calling convention
   ManagedRegister MethodRegister() override;
+  ManagedRegister ArgumentRegisterForMethodExitHook() override;
   bool IsCurrentParamInRegister() override;
   bool IsCurrentParamOnStack() override;
   ManagedRegister CurrentParamRegister() override;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2cc367f..72ca927 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -824,6 +824,9 @@
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
+    if (instruction_->IsMethodExitHook()) {
+      __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize());
+    }
     arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
     RestoreLiveRegisters(codegen, locations);
     __ B(GetExitLabel());
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2a9bc39..03a9977 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -973,6 +973,10 @@
         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
+    if (instruction_->IsMethodExitHook()) {
+      // Load frame size to pass to the exit hooks
+      __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
+    }
     arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
     RestoreLiveRegisters(codegen, locations);
     __ B(GetExitLabel());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a14ea8b..5cc7270 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -966,6 +966,9 @@
         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
+    if (instruction_->IsMethodExitHook()) {
+      __ movl(EBX, Immediate(codegen->GetFrameSize()));
+    }
     x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 3d0f35d..33c9ae4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -985,6 +985,10 @@
         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
+    if (instruction_->IsMethodExitHook()) {
+      // Load FrameSize to pass to the exit hook.
+      __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
+    }
     x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 2939c54..2fde783 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -878,6 +878,11 @@
   }
 }
 
+void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, size_t value) {
+  ArmManagedRegister dst = mdst.AsArm();
+  ___ Mov(AsVIXLRegister(dst), static_cast<uint32_t>(value));
+}
+
 void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
   DCHECK(size == 4 || size == 8) << size;
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 5965552..2b331af 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -100,6 +100,8 @@
 
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
+  void Move(ManagedRegister dest, size_t value) override;
+
   void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset32 thr_offs,
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index f0ade42..88a1b37 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -643,6 +643,12 @@
   }
 }
 
+void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, size_t value) {
+  Arm64ManagedRegister dst = m_dst.AsArm64();
+  DCHECK(dst.IsXRegister());
+  ___ Mov(reg_x(dst.AsXRegister()), value);
+}
+
 void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 tr_offs) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   Register scratch = temps.AcquireX();
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 9d3e821..762fe68 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -92,6 +92,7 @@
                      ArrayRef<ArgumentLocation> srcs,
                      ArrayRef<FrameOffset> refs) override;
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
+  void Move(ManagedRegister dest, size_t value) override;
   void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
   void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       override;
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 36de012..a91176c 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -215,6 +215,8 @@
                     ManagedRegister scratch,
                     size_t size) = 0;
 
+  virtual void Move(ManagedRegister dst, size_t value) = 0;
+
   virtual void MemoryBarrier(ManagedRegister scratch) = 0;
 
   // Sign extension
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index e292c5b..a1c874e 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -412,6 +412,11 @@
   }
 }
 
+void X86JNIMacroAssembler::Move(ManagedRegister mdest, size_t value) {
+  X86ManagedRegister dest = mdest.AsX86();
+  __ movl(dest.AsCpuRegister(), Immediate(value));
+}
+
 void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
   Register scratch = GetScratchRegister();
   __ movl(scratch, Address(ESP, src));
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 571b213..1a1bc13 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -92,6 +92,8 @@
 
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
+  void Move(ManagedRegister dest, size_t value) override;
+
   void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 8115911..7710dfb 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -484,6 +484,12 @@
   }
 }
 
+
+void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, size_t value) {
+  X86_64ManagedRegister dest = mdest.AsX86_64();
+  __ movq(dest.AsCpuRegister(), Immediate(value));
+}
+
 void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
   CpuRegister scratch = GetScratchRegister();
   __ movl(scratch, Address(CpuRegister(RSP), src));
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index 04c6bfc..827e1cc 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -95,6 +95,8 @@
 
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
+  void Move(ManagedRegister dest, size_t value) override;
+
   void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bd8149e..83a60ec 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1543,6 +1543,7 @@
     bx lr      // ret
 
 .Lcall_method_exit_hook:
+    mov r2, #FRAME_SIZE_SAVE_REFS_AND_ARGS
     bl art_quick_method_exit_hook
     b .Lcall_method_exit_hook_done
 
@@ -2572,15 +2573,18 @@
 END art_quick_method_entry_hook
 
 ENTRY art_quick_method_exit_hook
-    SETUP_SAVE_EVERYTHING_FRAME r2
+    SETUP_SAVE_EVERYTHING_FRAME r5
 
-    add r3, sp, #8                            @ store fpr_res pointer, in kSaveEverything frame
-    add r2, sp, #136                          @ store gpr_res pointer, in kSaveEverything frame
-    ldr r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod*
+    sub sp, #4                                @ align stack
+    push {r2}                                 @ pass frame_size stack
+    add r3, sp, #(8 + 8)                      @ store fpr_res pointer, in kSaveEverything frame
+    add r2, sp, #(136 + 8)                    @ store gpr_res pointer, in kSaveEverything frame
+    add r1, sp, #(FRAME_SIZE_SAVE_EVERYTHING + 8)   @ pass ArtMethod**
     mov r0, rSELF                             @ pass Thread::Current
-    blx artMethodExitHook                     @ (Thread*, ArtMethod*, gpr_res*, fpr_res*)
+    blx artMethodExitHook                     @ (Thread*, ArtMethod**, gpr_res*, fpr_res*,
+                                              @ frame_size)
 
-    // Normal return.
+    add sp, #8                                @ pop arguments on stack
     RESTORE_SAVE_EVERYTHING_FRAME
     REFRESH_MARKING_REGISTER
     blx lr
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index a35206f..354a3bd 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1915,6 +1915,7 @@
 
 .Lcall_method_exit_hook:
     fmov d0, x0
+    mov x4, FRAME_SIZE_SAVE_REFS_AND_ARGS
     bl art_quick_method_exit_hook
     b .Lcall_method_exit_hook_done
 
@@ -2677,15 +2678,16 @@
 ENTRY art_quick_method_exit_hook
     SETUP_SAVE_EVERYTHING_FRAME
 
+    // frame_size is passed from JITed code in x4
     add x3, sp, #16                           // floating-point result ptr in kSaveEverything frame
     add x2, sp, #272                          // integer result ptr in kSaveEverything frame
-    ldr x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // ArtMethod*
+    add x1, sp, #FRAME_SIZE_SAVE_EVERYTHING   // ArtMethod**
     mov x0, xSELF                             // Thread::Current
-    bl  artMethodExitHook                     // (Thread*, ArtMethod*, gpr_res*, fpr_res*)
+    bl  artMethodExitHook                     // (Thread*, ArtMethod**, gpr_res*, fpr_res*,
+                                              // frame_size)
 
     // Normal return.
     RESTORE_SAVE_EVERYTHING_FRAME
     REFRESH_MARKING_REGISTER
     ret
 END art_quick_method_exit_hook
-
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index bc61be5..eb8582d 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1738,6 +1738,7 @@
     ret
 
 .Lcall_method_exit_hook:
+    movl LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS), %ebx
     call art_quick_method_exit_hook
     jmp .Lcall_method_exit_hook_done
 
@@ -2363,25 +2364,29 @@
 END_FUNCTION art_quick_method_entry_hook
 
 DEFINE_FUNCTION art_quick_method_exit_hook
-    SETUP_SAVE_EVERYTHING_FRAME ebx
+    PUSH edi
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED edi
 
-    mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %ebx // Remember ArtMethod*
-    subl LITERAL(8), %esp                      // Align stack.
-    CFI_ADJUST_CFA_OFFSET(8)
+    leal FRAME_SIZE_SAVE_EVERYTHING(%esp), %edi // Remember ArtMethod**
+    subl LITERAL(4), %esp                      // Align stack.
+    CFI_ADJUST_CFA_OFFSET(4)
+
     PUSH_ARG edx                   // Save gpr return value. edx and eax need to be together
                                    // which isn't the case in kSaveEverything frame.
     PUSH_ARG eax
     movl %esp, %edx                // Get pointer to gpr_result
-    leal 32(%esp), %eax            // Get pointer to fpr_result, in kSaveEverything frame
+    leal 28(%esp), %eax            // Get pointer to fpr_result, in kSaveEverything frame
+    PUSH_ARG ebx                   // push frame_size
     PUSH_ARG eax                   // Pass fpr_result
     PUSH_ARG edx                   // Pass gpr_result
-    PUSH_ARG ebx                   // Pass ArtMethod*
+    PUSH_ARG edi                   // Pass ArtMethod**
     pushl %fs:THREAD_SELF_OFFSET   // Pass Thread::Current.
     CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artMethodExitHook) // (Thread*, ArtMethod*, gpr_result*, fpr_result*)
+    call SYMBOL(artMethodExitHook) // (Thread*, ArtMethod**, gpr_result*, fpr_result*,
+                                   // frame_size)
 
     // Return result could have been changed if it's a reference.
-    movl 16(%esp), %ecx
+    movl 20(%esp), %ecx
     movl %ecx, (80+32)(%esp)
     addl LITERAL(32), %esp         // Pop arguments and grp_result.
     CFI_ADJUST_CFA_OFFSET(-32)
@@ -2390,6 +2395,3 @@
     RESTORE_SAVE_EVERYTHING_FRAME
     ret
 END_FUNCTION art_quick_method_exit_hook
-
-
-
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0d79d00..396a8ab 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1614,6 +1614,7 @@
     ret
 
 .Lcall_method_exit_hook:
+   movq LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS), %r8
    call art_quick_method_exit_hook
    jmp .Lcall_method_exit_hook_done
 
@@ -2177,14 +2178,16 @@
 DEFINE_FUNCTION art_quick_method_exit_hook
     SETUP_SAVE_EVERYTHING_FRAME
 
+    // R8 passed from JITed code contains frame_size
     leaq 16(%rsp), %rcx                         // floating-point result pointer in kSaveEverything
                                                 // frame
     leaq 144(%rsp), %rdx                        // integer result pointer in kSaveEverything frame
-    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod
+    leaq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod**
     movq %gs:THREAD_SELF_OFFSET, %rdi           // Thread::Current
-    call SYMBOL(artMethodExitHook)              // (Thread*, SP, gpr_res*, fpr_res*)
+    call SYMBOL(artMethodExitHook)              // (Thread*, ArtMethod**, gpr_res*, fpr_res*,
+                                                //  frame_size)
 
     // Normal return.
     RESTORE_SAVE_EVERYTHING_FRAME
     ret
-END_FUNCTION art_quick_method_entry_hook
+END_FUNCTION art_quick_method_exit_hook
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1abdd2b..a854ea0 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2684,10 +2684,11 @@
 }
 
 extern "C" void artMethodExitHook(Thread* self,
-                                 ArtMethod* method,
-                                 uint64_t* gpr_result,
-                                 uint64_t* fpr_result)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
+                                  ArtMethod** sp,
+                                  uint64_t* gpr_result,
+                                  uint64_t* fpr_result,
+                                  uint32_t frame_size)
+  REQUIRES_SHARED(Locks::mutator_lock_) {
   // For GenericJniTrampolines we call artMethodExitHook even for non debuggable runtimes though we
   // still install instrumentation stubs. So just return early here so we don't call method exit
   // twice. In all other cases (JITed JNI stubs / JITed code) we only call this for debuggable
@@ -2706,6 +2707,7 @@
   instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
   DCHECK(instr->AreExitStubsInstalled());
   bool is_ref;
+  ArtMethod* method = *sp;
   JValue return_value = instr->GetReturnValue(method, &is_ref, gpr_result, fpr_result);
   bool deoptimize = false;
   {
@@ -2717,12 +2719,7 @@
     }
     DCHECK(!method->IsRuntimeMethod());
 
-    // Deoptimize if the caller needs to continue execution in the interpreter. Do nothing if we get
-    // back to an upcall.
-    NthCallerVisitor visitor(self, 1, /*include_runtime_and_upcalls=*/false);
-    visitor.WalkStack(true);
-    deoptimize = instr->ShouldDeoptimizeCaller(self, visitor);
-
+    deoptimize = instr->ShouldDeoptimizeCaller(self, sp, frame_size);
     // If we need a deoptimization MethodExitEvent will be called by the interpreter when it
     // re-executes the return instruction. For native methods we have to process method exit
     // events here since deoptimization just removes the native frame.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index ab0998b..dea36a8 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1668,15 +1668,18 @@
   ArtMethod* runtime_method = *sp;
   DCHECK(runtime_method->IsRuntimeMethod());
   QuickMethodFrameInfo frame_info = Runtime::Current()->GetRuntimeMethodFrameInfo(runtime_method);
+  return ShouldDeoptimizeCaller(self, sp, frame_info.FrameSizeInBytes());
+}
 
-  uintptr_t caller_sp = reinterpret_cast<uintptr_t>(sp) + frame_info.FrameSizeInBytes();
+bool Instrumentation::ShouldDeoptimizeCaller(Thread* self, ArtMethod** sp, size_t frame_size) {
+  uintptr_t caller_sp = reinterpret_cast<uintptr_t>(sp) + frame_size;
   ArtMethod* caller = *(reinterpret_cast<ArtMethod**>(caller_sp));
-  uintptr_t caller_pc_addr = reinterpret_cast<uintptr_t>(sp) + frame_info.GetReturnPcOffset();
+  uintptr_t caller_pc_addr = reinterpret_cast<uintptr_t>(sp) + (frame_size - sizeof(void*));
   uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(caller_pc_addr);
-
   return ShouldDeoptimizeCaller(self, caller, caller_pc, caller_sp);
 }
 
+
 bool Instrumentation::ShouldDeoptimizeCaller(Thread* self, const NthCallerVisitor& visitor) {
   uintptr_t caller_sp = reinterpret_cast<uintptr_t>(visitor.GetCurrentQuickFrame());
   // When the caller isn't executing quick code there is no need to deoptimize.
@@ -1692,6 +1695,7 @@
                                              uintptr_t caller_sp) {
   if (caller == nullptr ||
       caller->IsNative() ||
+      caller->IsRuntimeMethod() ||
       caller_pc == reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc())) {
     // If caller_pc is QuickInstrumentationExit then deoptimization will be handled by the
     // instrumentation exit trampoline so we don't need to handle deoptimizations here.
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 634b0d2..25e787f 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -513,6 +513,8 @@
   // method requires a deopt or if this particular frame needs a deopt because of a class
   // redefinition.
   bool ShouldDeoptimizeCaller(Thread* self, ArtMethod** sp) REQUIRES_SHARED(Locks::mutator_lock_);
+  bool ShouldDeoptimizeCaller(Thread* self, ArtMethod** sp, size_t frame_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
   // This is a helper function used by the two variants of ShouldDeoptimizeCaller.
   // Remove this once ShouldDeoptimizeCaller is updated not to use NthCallerVisitor.
   bool ShouldDeoptimizeCaller(Thread* self,