Reland^2 "Don't use instrumentation stubs for native methods in debuggable" am: 5eb7ad2aac am: 0ddb9ccb82

Original change: https://android-review.googlesource.com/c/platform/art/+/2145999

Change-Id: Ieaac98286f68fcfeec11d999b9abf1c6b1d7e396
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index b88ebaf..d672500 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -36,7 +36,9 @@
 #include "dex/dex_file-inl.h"
 #include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "instrumentation.h"
 #include "jni/jni_env_ext.h"
+#include "runtime.h"
 #include "thread.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/arm64/managed_register_arm64.h"
@@ -95,6 +97,12 @@
   const InstructionSetFeatures* instruction_set_features =
       compiler_options.GetInstructionSetFeatures();
 
+  // i.e. if the method was annotated with @FastNative
+  const bool is_fast_native = (access_flags & kAccFastNative) != 0u;
+
+  // i.e. if the method was annotated with @CriticalNative
+  const bool is_critical_native = (access_flags & kAccCriticalNative) != 0u;
+
   // When  walking the stack the top frame doesn't have a pc associated with it. We then depend on
   // the invariant that we don't have JITed code when AOT code is available. In debuggable runtimes
   // this invariant doesn't hold. So we tag the SP for JITed code to indentify if we are executing
@@ -102,11 +110,12 @@
   // debuggable runtimes.
   bool should_tag_sp = compiler_options.GetDebuggable() && compiler_options.IsJitCompiler();
 
-  // i.e. if the method was annotated with @FastNative
-  const bool is_fast_native = (access_flags & kAccFastNative) != 0u;
-
-  // i.e. if the method was annotated with @CriticalNative
-  const bool is_critical_native = (access_flags & kAccCriticalNative) != 0u;
+  // We don't JIT stubs for critical native methods in debuggable runtimes.
+  // TODO(mythria): Add support required for calling method entry / exit hooks from critical native
+  // methods.
+  bool needs_entry_exit_hooks = compiler_options.GetDebuggable() &&
+                                compiler_options.IsJitCompiler() &&
+                                !is_critical_native;
 
   VLOG(jni) << "JniCompile: Method :: "
               << dex_file.PrettyMethod(method_idx, /* with signature */ true)
@@ -229,6 +238,21 @@
     __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>(), should_tag_sp);
   }
 
+  // 1.5. Call any method entry hooks if required.
+  // For critical native methods, we don't JIT stubs in debuggable runtimes (see
+  // OptimizingCompiler::JitCompile).
+  // TODO(mythria): Add support to call method entry / exit hooks for critical native methods too.
+  std::unique_ptr<JNIMacroLabel> method_entry_hook_slow_path;
+  std::unique_ptr<JNIMacroLabel> method_entry_hook_return;
+  if (UNLIKELY(needs_entry_exit_hooks)) {
+    uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
+    int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
+    method_entry_hook_slow_path = __ CreateLabel();
+    method_entry_hook_return = __ CreateLabel();
+    __ TestByteAndJumpIfNotZero(address + offset, method_entry_hook_slow_path.get());
+    __ Bind(method_entry_hook_return.get());
+  }
+
   // 2. Lock the object (if synchronized) and transition out of Runnable (if normal native).
 
   // 2.1. Lock the synchronization object (`this` or class) for synchronized methods.
@@ -539,7 +563,21 @@
     __ Bind(suspend_check_resume.get());
   }
 
-  // 7.5. Remove activation - need to restore callee save registers since the GC
+  // 7.5. Check if method exit hooks needs to be called
+  // For critical native methods, we don't JIT stubs in debuggable runtimes.
+  // TODO(mythria): Add support to call method entry / exit hooks for critical native methods too.
+  std::unique_ptr<JNIMacroLabel> method_exit_hook_slow_path;
+  std::unique_ptr<JNIMacroLabel> method_exit_hook_return;
+  if (UNLIKELY(needs_entry_exit_hooks)) {
+    uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
+    int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
+    method_exit_hook_slow_path = __ CreateLabel();
+    method_exit_hook_return = __ CreateLabel();
+    __ TestByteAndJumpIfNotZero(address + offset, method_exit_hook_slow_path.get());
+    __ Bind(method_exit_hook_return.get());
+  }
+
+  // 7.6. Remove activation - need to restore callee save registers since the GC
   //      may have changed them.
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
   if (LIKELY(!is_critical_native) || !main_jni_conv->UseTailCall()) {
@@ -637,6 +675,24 @@
     __ DeliverPendingException();
   }
 
+  // 8.6. Method entry / exit hooks slow paths.
+  if (UNLIKELY(needs_entry_exit_hooks)) {
+    __ Bind(method_entry_hook_slow_path.get());
+    // Use Jni specific method entry hook that saves all the arguments. We have only saved the
+    // callee save registers at this point. So go through Jni specific stub that saves the rest
+    // of the live registers.
+    __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEntryHook));
+    __ ExceptionPoll(exception_slow_path.get());
+    __ Jump(method_entry_hook_return.get());
+
+    __ Bind(method_exit_hook_slow_path.get());
+    // Method exit hooks is called just before tearing down the frame. So there are no live
+    // registers and we can directly call the method exit hook and don't need a Jni specific
+    // entrypoint.
+    __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pMethodExitHook));
+    __ Jump(method_exit_hook_return.get());
+  }
+
   // 9. Finalize code generation.
   __ FinalizeCode();
   size_t cs = __ CodeSize();
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 6eb3d01..a499c55 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -1233,6 +1233,14 @@
   ArenaAllocator allocator(runtime->GetJitArenaPool());
 
   if (UNLIKELY(method->IsNative())) {
+    // Use GenericJniTrampoline for critical native methods in debuggable runtimes. We don't
+    // support calling method entry / exit hooks for critical native methods yet.
+    // TODO(mythria): Add support for calling method entry / exit hooks in JITed stubs for critical
+    // native methods too.
+    if (runtime->IsJavaDebuggable() && method->IsCriticalNative()) {
+      DCHECK(compiler_options.IsJitCompiler());
+      return false;
+    }
     JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod(
         compiler_options, access_flags, method_idx, *dex_file, &allocator);
     std::vector<Handle<mirror::Object>> roots;
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 61151fe..a4fddbc 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -1220,6 +1220,14 @@
   }
 }
 
+void ArmVIXLJNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  vixl32::Register scratch = temps.Acquire();
+  ___ Mov(scratch, static_cast<uint32_t>(address));
+  ___ Ldrb(scratch, MemOperand(scratch, 0));
+  ___ CompareAndBranchIfNonZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+}
+
 void ArmVIXLJNIMacroAssembler::Bind(JNIMacroLabel* label) {
   CHECK(label != nullptr);
   ___ Bind(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 980de41..5965552 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -213,6 +213,8 @@
   void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
   void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+  // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+  void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 323a01e..0f6e5eb 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -1040,6 +1040,14 @@
   }
 }
 
+void Arm64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Mov(scratch, address);
+  ___ Ldrb(scratch.W(), MEM_OP(scratch, 0));
+  ___ Cbnz(scratch.W(), Arm64JNIMacroLabel::Cast(label)->AsArm64());
+}
+
 void Arm64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
   CHECK(label != nullptr);
   ___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64());
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index daea95d..9d3e821 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -197,6 +197,8 @@
   void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
   void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+  // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+  void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index dac21ae..ae84338 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -155,7 +155,7 @@
   "     224: d9 f8 24 80   ldr.w r8, [r9, #36]\n"
   "     228: 70 47         bx lr\n"
   "     22a: d9 f8 9c 00   ldr.w r0, [r9, #156]\n"
-  "     22e: d9 f8 d0 e2   ldr.w lr, [r9, #720]\n"
+  "     22e: d9 f8 d4 e2   ldr.w lr, [r9, #724]\n"
   "     232: f0 47         blx lr\n"
 };
 
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index c8c713a..36de012 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -286,6 +286,8 @@
   virtual void TestMarkBit(ManagedRegister ref,
                            JNIMacroLabel* label,
                            JNIMacroUnaryCondition cond) = 0;
+  // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+  virtual void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) = 0;
   // Code at this offset will serve as the target for the Jump call.
   virtual void Bind(JNIMacroLabel* label) = 0;
 
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 55d5428..e292c5b 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -734,6 +734,12 @@
   __ j(UnaryConditionToX86Condition(cond), X86JNIMacroLabel::Cast(label)->AsX86());
 }
 
+
+void X86JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+  __ cmpb(Address::Absolute(address), Immediate(0));
+  __ j(kNotZero, X86JNIMacroLabel::Cast(label)->AsX86());
+}
+
 void X86JNIMacroAssembler::Bind(JNIMacroLabel* label) {
   CHECK(label != nullptr);
   __ Bind(X86JNIMacroLabel::Cast(label)->AsX86());
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index f8ce38b..571b213 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -189,6 +189,8 @@
   void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
   void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+  // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+  void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index adc431f..8115911 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -810,6 +810,13 @@
   __ j(UnaryConditionToX86_64Condition(cond), X86_64JNIMacroLabel::Cast(label)->AsX86_64());
 }
 
+void X86_64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+  CpuRegister scratch = GetScratchRegister();
+  __ movq(scratch, Immediate(address));
+  __ cmpb(Address(scratch, 0), Immediate(0));
+  __ j(kNotZero, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
+}
+
 void X86_64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
   CHECK(label != nullptr);
   __ Bind(X86_64JNIMacroLabel::Cast(label)->AsX86_64());
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index feaf27e..04c6bfc 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -209,6 +209,8 @@
   void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
   void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+  // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+  void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index d9af774..06f5066 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -505,7 +505,7 @@
   EXPECT_EQ(68U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(4U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(167 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+  EXPECT_EQ(168 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
 
diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S
index 7270d20..d91882c 100644
--- a/runtime/arch/arm/jni_entrypoints_arm.S
+++ b/runtime/arch/arm/jni_entrypoints_arm.S
@@ -327,6 +327,11 @@
 JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_method_start, artJniMethodStart, rSELF
 
     /*
+     * Trampoline to `artJniMethodEntryHook()` that preserves all managed arguments.
+     */
+JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_method_entry_hook, artJniMethodEntryHook, rSELF
+
+    /*
      * Trampoline to `artJniMonitoredMethodStart()` that preserves all managed arguments.
      */
 JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_monitored_method_start, artJniMonitoredMethodStart, rSELF
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 3d39eb5..bd8149e 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1526,16 +1526,26 @@
     .cfi_remember_state
     .cfi_def_cfa_register sp
 
+    // store into fpr, for when it's a fpr return...
+    vmov d0, r0, r1
+
+    LOAD_RUNTIME_INSTANCE r2
+    ldr r2, [r2,  #INSTRUMENTATION_STUBS_INSTALLED_OFFSET_FROM_RUNTIME_INSTANCE]
+    cbnz r2, .Lcall_method_exit_hook
+.Lcall_method_exit_hook_done:
+
     // Tear down the callee-save frame. Skip arg registers.
     add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
     .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
     RESTORE_SAVE_REFS_ONLY_FRAME
     REFRESH_MARKING_REGISTER
 
-    // store into fpr, for when it's a fpr return...
-    vmov d0, r0, r1
     bx lr      // ret
 
+.Lcall_method_exit_hook:
+    bl art_quick_method_exit_hook
+    b .Lcall_method_exit_hook_done
+
     // Undo the unwinding information from above since it doesn't apply below.
     CFI_RESTORE_STATE_AND_DEF_CFA r10, FRAME_SIZE_SAVE_REFS_AND_ARGS
 .Lexception_in_native:
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
index b3ea40d..9612a7b 100644
--- a/runtime/arch/arm64/jni_entrypoints_arm64.S
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -366,6 +366,11 @@
 JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_method_start, artJniMethodStart, xSELF
 
     /*
+     * Trampoline to `artJniMethodEntryHook` that preserves all managed arguments.
+     */
+JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_method_entry_hook, artJniMethodEntryHook, xSELF
+
+    /*
      * Trampoline to `artJniMonitoredMethodStart()` that preserves all managed arguments.
      */
 JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_monitored_method_start, artJniMonitoredMethodStart, xSELF
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index b12945b..a35206f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1900,6 +1900,11 @@
     .cfi_remember_state
     .cfi_def_cfa_register sp
 
+    LOAD_RUNTIME_INSTANCE x1
+    ldrb w1, [x1, #INSTRUMENTATION_STUBS_INSTALLED_OFFSET_FROM_RUNTIME_INSTANCE]
+    cbnz w1, .Lcall_method_exit_hook
+.Lcall_method_exit_hook_done:
+
     // Tear down the callee-save frame.
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
     REFRESH_MARKING_REGISTER
@@ -1908,6 +1913,11 @@
     fmov d0, x0
     ret
 
+.Lcall_method_exit_hook:
+    fmov d0, x0
+    bl art_quick_method_exit_hook
+    b .Lcall_method_exit_hook_done
+
     // Undo the unwinding information from above since it doesn't apply below.
     CFI_RESTORE_STATE_AND_DEF_CFA x28, FRAME_SIZE_SAVE_REFS_AND_ARGS
 .Lexception_in_native:
diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S
index 4b43814..c7cf856 100644
--- a/runtime/arch/x86/jni_entrypoints_x86.S
+++ b/runtime/arch/x86/jni_entrypoints_x86.S
@@ -286,6 +286,12 @@
 JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_method_start, artJniMethodStart, fs:THREAD_SELF_OFFSET
 
     /*
+     * Trampoline to `artJniMethodEntryHook` that preserves all managed arguments.
+     */
+JNI_SAVE_MANAGED_ARGS_TRAMPOLINE \
+    art_jni_method_entry_hook, artJniMethodEntryHook, fs:THREAD_SELF_OFFSET
+
+    /*
      * Trampoline to `artJniMonitoredMethodStart()` that preserves all managed arguments.
      */
 JNI_SAVE_MANAGED_ARGS_TRAMPOLINE \
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 82aa6b2..bc61be5 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1713,6 +1713,16 @@
     CFI_REMEMBER_STATE
     CFI_DEF_CFA_REGISTER(esp)
 
+    // Quick expects the return value to be in xmm0.
+    movd %eax, %xmm0
+    movd %edx, %xmm1
+    punpckldq %xmm1, %xmm0
+
+    LOAD_RUNTIME_INSTANCE ebx
+    cmpb MACRO_LITERAL(0), INSTRUMENTATION_STUBS_INSTALLED_OFFSET_FROM_RUNTIME_INSTANCE(%ebx)
+    jne .Lcall_method_exit_hook
+.Lcall_method_exit_hook_done:
+
     // Tear down the callee-save frame.
     // Remove space for FPR args and EAX
     addl LITERAL(4 + 4 * 8), %esp
@@ -1725,12 +1735,12 @@
     POP ebp  // Restore callee saves
     POP esi
     POP edi
-    // Quick expects the return value to be in xmm0.
-    movd %eax, %xmm0
-    movd %edx, %xmm1
-    punpckldq %xmm1, %xmm0
     ret
 
+.Lcall_method_exit_hook:
+    call art_quick_method_exit_hook
+    jmp .Lcall_method_exit_hook_done
+
     // Undo the unwinding information from above since it doesn't apply below.
     CFI_RESTORE_STATE_AND_DEF_CFA ebp, 64
 .Lexception_in_native:
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index d2f1fe1..55f01b7 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -400,6 +400,12 @@
 JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_method_start, artJniMethodStart, gs:THREAD_SELF_OFFSET
 
     /*
+     * Trampoline to `artJniMethodEntryHook` that preserves all managed arguments.
+     */
+JNI_SAVE_MANAGED_ARGS_TRAMPOLINE \
+    art_jni_method_entry_hook, artJniMethodEntryHook, gs:THREAD_SELF_OFFSET
+
+    /*
      * Trampoline to `artJniMonitoredMethodStart()` that preserves all managed arguments.
      */
 JNI_SAVE_MANAGED_ARGS_TRAMPOLINE \
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index fa1cd95..0d79d00 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1574,6 +1574,14 @@
     CFI_REMEMBER_STATE
     CFI_DEF_CFA_REGISTER(rsp)
 
+    // store into fpr, for when it's a fpr return...
+    movq %rax, %xmm0
+
+    LOAD_RUNTIME_INSTANCE rcx
+    cmpb MACRO_LITERAL(0), INSTRUMENTATION_STUBS_INSTALLED_OFFSET_FROM_RUNTIME_INSTANCE(%rcx)
+    jne .Lcall_method_exit_hook
+.Lcall_method_exit_hook_done:
+
     // Tear down the callee-save frame.
     // Load FPRs.
     // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
@@ -1603,10 +1611,12 @@
     POP r13  // Callee save.
     POP r14  // Callee save.
     POP r15  // Callee save.
-    // store into fpr, for when it's a fpr return...
-    movq %rax, %xmm0
     ret
 
+.Lcall_method_exit_hook:
+   call art_quick_method_exit_hook
+   jmp .Lcall_method_exit_hook_done
+
     // Undo the unwinding information from above since it doesn't apply below.
     CFI_RESTORE_STATE_AND_DEF_CFA rbp, 208
 .Lexception_in_native:
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index f8856d8..cb3caac 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -122,6 +122,7 @@
 extern "C" void art_jni_monitored_method_start();
 extern "C" void art_jni_method_end();
 extern "C" void art_jni_monitored_method_end();
+extern "C" void art_jni_method_entry_hook();
 
 // JNI lock/unlock entrypoints. Note: Custom calling convention.
 extern "C" void art_jni_lock_object(art::mirror::Object*);
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 939feee..ea07788 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -79,6 +79,7 @@
   qpoints->SetQuickGenericJniTrampoline(art_quick_generic_jni_trampoline);
   qpoints->SetJniDecodeReferenceResult(JniDecodeReferenceResult);
   qpoints->SetJniReadBarrier(art_jni_read_barrier);
+  qpoints->SetJniMethodEntryHook(art_jni_method_entry_hook);
 
   // Locks
   if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 7af1a0b..0e73c63 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -67,6 +67,7 @@
 // JNI entrypoints when monitoring entry/exit.
 extern "C" void artJniMonitoredMethodStart(Thread* self) UNLOCK_FUNCTION(Locks::mutator_lock_);
 extern "C" void artJniMonitoredMethodEnd(Thread* self) SHARED_LOCK_FUNCTION(Locks::mutator_lock_);
+extern "C" void artJniMethodEntryHook(Thread* self);
 
 // StringAppend pattern entrypoint.
 extern "C" mirror::String* artStringBuilderAppend(uint32_t format,
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index dffaa4b..4534bba 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -78,6 +78,7 @@
   V(JniLockObject, void, mirror::Object*) \
   V(JniUnlockObject, void, mirror::Object*) \
   V(QuickGenericJniTrampoline, void, ArtMethod*) \
+  V(JniMethodEntryHook, void) \
 \
   V(LockObject, void, mirror::Object*) \
   V(UnlockObject, void, mirror::Object*) \
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index ab13bd9..8569124 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -38,6 +38,11 @@
 
 namespace art {
 
+extern "C" int artMethodExitHook(Thread* self,
+                                 ArtMethod* method,
+                                 uint64_t* gpr_result,
+                                 uint64_t* fpr_result);
+
 static_assert(sizeof(IRTSegmentState) == sizeof(uint32_t), "IRTSegmentState size unexpected");
 static_assert(std::is_trivial<IRTSegmentState>::value, "IRTSegmentState not trivial");
 
@@ -174,11 +179,11 @@
     artJniUnlockObject(lock.Ptr(), self);
   }
   char return_shorty_char = called->GetShorty()[0];
+  uint64_t ret;
   if (return_shorty_char == 'L') {
-    uint64_t ret = reinterpret_cast<uint64_t>(
+    ret = reinterpret_cast<uint64_t>(
         UNLIKELY(self->IsExceptionPending()) ? nullptr : JniDecodeReferenceResult(result.l, self));
     PopLocalReferences(saved_local_ref_cookie, self);
-    return ret;
   } else {
     if (LIKELY(!critical_native)) {
       PopLocalReferences(saved_local_ref_cookie, self);
@@ -188,32 +193,43 @@
         if (kRuntimeISA == InstructionSet::kX86) {
           // Convert back the result to float.
           double d = bit_cast<double, uint64_t>(result_f);
-          return bit_cast<uint32_t, float>(static_cast<float>(d));
+          ret = bit_cast<uint32_t, float>(static_cast<float>(d));
         } else {
-          return result_f;
+          ret = result_f;
         }
       }
+      break;
       case 'D':
-        return result_f;
+        ret = result_f;
+        break;
       case 'Z':
-        return result.z;
+        ret = result.z;
+        break;
       case 'B':
-        return result.b;
+        ret = result.b;
+        break;
       case 'C':
-        return result.c;
+        ret = result.c;
+        break;
       case 'S':
-        return result.s;
+        ret = result.s;
+        break;
       case 'I':
-        return result.i;
+        ret = result.i;
+        break;
       case 'J':
-        return result.j;
+        ret = result.j;
+        break;
       case 'V':
-        return 0;
+        ret = 0;
+        break;
       default:
         LOG(FATAL) << "Unexpected return shorty character " << return_shorty_char;
         UNREACHABLE();
     }
   }
+
+  return ret;
 }
 
 extern "C" void artJniMonitoredMethodStart(Thread* self) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1c93460..0c7402c 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2110,6 +2110,14 @@
     }
   }
 
+  instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instr->AreExitStubsInstalled() && Runtime::Current()->IsJavaDebuggable())) {
+    instr->MethodEnterEvent(self, called);
+    if (self->IsExceptionPending()) {
+      return nullptr;
+    }
+  }
+
   // Skip calling `artJniMethodStart()` for @CriticalNative and @FastNative.
   if (LIKELY(normal_native)) {
     // Start JNI.
@@ -2651,6 +2659,13 @@
   return result.GetJ();
 }
 
+extern "C" void artJniMethodEntryHook(Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
+  ArtMethod* method = *self->GetManagedStack()->GetTopQuickFrame();
+  instr->MethodEnterEvent(self, method);
+}
+
 extern "C" void artMethodEntryHook(ArtMethod* method, Thread* self, ArtMethod** sp ATTRIBUTE_UNUSED)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
@@ -2668,6 +2683,14 @@
                                  uint64_t* gpr_result,
                                  uint64_t* fpr_result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  // For GenericJniTrampolines we call artMethodExitHook even for non debuggable runtimes though we
+  // still install instrumentation stubs. So just return early here so we don't call method exit
+  // twice. In all other cases (JITed JNI stubs / JITed code) we only call this for debuggable
+  // runtimes.
+  if (!Runtime::Current()->IsJavaDebuggable()) {
+    return;
+  }
+
   DCHECK_EQ(reinterpret_cast<uintptr_t>(self), reinterpret_cast<uintptr_t>(Thread::Current()));
   CHECK(gpr_result != nullptr);
   CHECK(fpr_result != nullptr);
@@ -2696,8 +2719,9 @@
     deoptimize = instr->ShouldDeoptimizeCaller(self, visitor);
 
     // If we need a deoptimization MethodExitEvent will be called by the interpreter when it
-    // re-executes the return instruction.
-    if (!deoptimize) {
+    // re-executes the return instruction. For native methods we have to process method exit
+    // events here since deoptimization just removes the native frame.
+    if (!deoptimize || method->IsNative()) {
       instr->MethodExitEvent(self,
                              method,
                              /* frame= */ {},
@@ -2720,7 +2744,11 @@
 
   if (deoptimize) {
     DeoptimizationMethodType deopt_method_type = instr->GetDeoptimizationMethodType(method);
-    self->PushDeoptimizationContext(return_value, is_ref, nullptr, false, deopt_method_type);
+    self->PushDeoptimizationContext(return_value,
+                                    is_ref,
+                                    self->GetException(),
+                                    false,
+                                    deopt_method_type);
     artDeoptimize(self);
     UNREACHABLE();
   }
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 240ecbd..2cd58db 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -225,7 +225,9 @@
                          pJniUnlockObject, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniUnlockObject,
                          pQuickGenericJniTrampoline, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickGenericJniTrampoline, pLockObject, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickGenericJniTrampoline,
+                         pJniMethodEntryHook, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEntryHook, pLockObject, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLockObject, pUnlockObject, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUnlockObject, pCmpgDouble, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCmpgDouble, pCmpgFloat, sizeof(void*));
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index ec89ea1..39e6aff 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -230,20 +230,19 @@
     return false;
   }
 
-  // When jiting code for debuggable apps we generate the code to call method
-  // entry / exit hooks when required. Hence it is not required to update
-  // to instrumentation entry point for JITed code in debuggable mode.
   if (!Runtime::Current()->IsJavaDebuggable()) {
     return true;
   }
 
-  // Native functions can have JITed entry points but we don't include support
-  // for calling entry / exit hooks directly from the JITed code for native
-  // functions. So we still have to install entry exit stubs for such cases.
+  // Native methods don't need method entry / exit hooks in debuggable runtimes.
+  // GenericJni trampoline and JITed JNI stubs handle entry / exit hooks
   if (method->IsNative()) {
-    return true;
+    return false;
   }
 
+  // When jiting code for debuggable apps we generate the code to call method
+  // entry / exit hooks when required. Hence it is not required to update
+  // to instrumentation entry point for JITed code in debuggable mode.
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) {
     return false;
@@ -520,6 +519,11 @@
           LOG(INFO) << "Ignoring already instrumented " << frame.Dump();
         }
       } else {
+        if (m->IsNative() && Runtime::Current()->IsJavaDebuggable()) {
+          // Native methods in debuggable runtimes don't use instrumentation stubs.
+          return true;
+        }
+
         // If it is a JITed frame then just set the deopt bit if required
         // otherwise continue
         const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
@@ -1688,6 +1692,11 @@
 
 bool Instrumentation::ShouldDeoptimizeCaller(Thread* self, const NthCallerVisitor& visitor) {
   bool should_deoptimize_frame = false;
+  if (visitor.caller != nullptr && visitor.caller->IsNative()) {
+    // Native methods don't need deoptimization. We don't set breakpoints / redefine native methods.
+    return false;
+  }
+
   if (visitor.caller != nullptr) {
     const OatQuickMethodHeader* header = visitor.GetCurrentOatQuickMethodHeader();
     if (header != nullptr && header->HasShouldDeoptimizeFlag()) {
diff --git a/runtime/oat.h b/runtime/oat.h
index 14b389d..341e70b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
-  // Last oat version changed reason: Update deoptimization from runtime methods.
-  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '2', '6', '\0' } };
+  // Last oat version changed reason: Don't use instrumentation stubs for native methods.
+  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '2', '7', '\0' } };
 
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 8adc3b3..40a1c16 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -399,9 +399,10 @@
       return true;
     } else if (method->IsNative()) {
       // If we return from JNI with a pending exception and want to deoptimize, we need to skip
-      // the native method.
-      // The top method is a runtime method, the native method comes next.
-      CHECK_EQ(GetFrameDepth(), 1U);
+      // the native method. The top method is a runtime method, the native method comes next.
+      // We also deoptimize due to method instrumentation reasons from method entry / exit
+      // callbacks. In these cases native method is at the top of stack.
+      CHECK((GetFrameDepth() == 1U) || (GetFrameDepth() == 0U));
       callee_method_ = method;
       return true;
     } else if (!single_frame_deopt_ &&
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index ba3af80..d3ca77d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -3195,14 +3195,12 @@
   // If we've already started and we are setting this runtime to debuggable,
   // we patch entry points of methods in boot image to interpreter bridge, as
   // boot image code may be AOT compiled as not debuggable.
-  if (!GetInstrumentation()->IsForcedInterpretOnly()) {
-    UpdateEntryPointsClassVisitor visitor(GetInstrumentation());
-    GetClassLinker()->VisitClasses(&visitor);
-    jit::Jit* jit = GetJit();
-    if (jit != nullptr) {
-      // Code previously compiled may not be compiled debuggable.
-      jit->GetCodeCache()->TransitionToDebuggable();
-    }
+  UpdateEntryPointsClassVisitor visitor(GetInstrumentation());
+  GetClassLinker()->VisitClasses(&visitor);
+  jit::Jit* jit = GetJit();
+  if (jit != nullptr) {
+    // Code previously compiled may not be compiled debuggable.
+    jit->GetCodeCache()->TransitionToDebuggable();
   }
 }
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 9c909c4..91f1644 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -505,6 +505,10 @@
     return OFFSETOF_MEMBER(Runtime, callee_save_methods_[static_cast<size_t>(type)]);
   }
 
+  static constexpr MemberOffset GetInstrumentationOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(Runtime, instrumentation_));
+  }
+
   InstructionSet GetInstructionSet() const {
     return instruction_set_;
   }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 0c53609..94c539f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3592,6 +3592,7 @@
   QUICK_ENTRY_POINT_INFO(pAputObject)
   QUICK_ENTRY_POINT_INFO(pJniMethodStart)
   QUICK_ENTRY_POINT_INFO(pJniMethodEnd)
+  QUICK_ENTRY_POINT_INFO(pJniMethodEntryHook)
   QUICK_ENTRY_POINT_INFO(pJniDecodeReferenceResult)
   QUICK_ENTRY_POINT_INFO(pJniLockObject)
   QUICK_ENTRY_POINT_INFO(pJniUnlockObject)
diff --git a/tools/cpp-define-generator/runtime.def b/tools/cpp-define-generator/runtime.def
index 2a2e303..4407e0c 100644
--- a/tools/cpp-define-generator/runtime.def
+++ b/tools/cpp-define-generator/runtime.def
@@ -30,3 +30,9 @@
            art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveRefsAndArgs))
 ASM_DEFINE(RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET,
            art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveRefsOnly))
+ASM_DEFINE(RUNTIME_INSTRUMENTATION_OFFSET, art::Runtime::GetInstrumentationOffset().Int32Value())
+ASM_DEFINE(INSTRUMENTATION_STUBS_INSTALLED_OFFSET,
+           art::instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value())
+ASM_DEFINE(INSTRUMENTATION_STUBS_INSTALLED_OFFSET_FROM_RUNTIME_INSTANCE,
+           art::Runtime::GetInstrumentationOffset().Int32Value() +
+           art::instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value())