Reland^3 "Don't use AOT code for native methods for java debuggable runtime"

This reverts commit fb1b08cbb9c6ac149d75de16c14fdaa8b68baaa4.

Reason for revert: Reland after a fix. We had to update untagging in jni_dlsym_lookup_stub as well.

Change-Id: Id936e9e60f9e87e96f1a9a79cd2118631ad1616b
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 0a1f017..d05324b 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -414,7 +414,7 @@
   CHECK(!caller->IsCriticalNative());
   CHECK(caller->IsSynchronized());
   ObjPtr<mirror::Object> lock;
-  if (self->GetManagedStack()->GetTopQuickFrameTag()) {
+  if (self->GetManagedStack()->GetTopQuickFrameGenericJniTag()) {
     // Generic JNI.
     lock = GetGenericJniSynchronizationObject(self, caller);
   } else if (caller->IsStatic()) {
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 6cb5021..b88ebaf 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -95,6 +95,13 @@
   const InstructionSetFeatures* instruction_set_features =
       compiler_options.GetInstructionSetFeatures();
 
+  // When  walking the stack the top frame doesn't have a pc associated with it. We then depend on
+  // the invariant that we don't have JITed code when AOT code is available. In debuggable runtimes
+  // this invariant doesn't hold. So we tag the SP for JITed code to indentify if we are executing
+  // JITed code or AOT code. Since tagging involves additional instructions we tag only in
+  // debuggable runtimes.
+  bool should_tag_sp = compiler_options.GetDebuggable() && compiler_options.IsJitCompiler();
+
   // i.e. if the method was annotated with @FastNative
   const bool is_fast_native = (access_flags & kAccFastNative) != 0u;
 
@@ -219,7 +226,7 @@
   //       because garbage collections are disabled within the execution of a
   //       @CriticalNative method.
   if (LIKELY(!is_critical_native)) {
-    __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
+    __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>(), should_tag_sp);
   }
 
   // 2. Lock the object (if synchronized) and transition out of Runnable (if normal native).
@@ -605,7 +612,7 @@
     if (reference_return) {
       // Suspend check entry point overwrites top of managed stack and leaves it clobbered.
       // We need to restore the top for subsequent runtime call to `JniDecodeReferenceResult()`.
-      __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
+      __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>(), should_tag_sp);
     }
     if (reference_return && main_out_arg_size != 0) {
       __ IncreaseFrameSize(main_out_arg_size);
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 6e6d40d..61151fe 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -428,8 +428,15 @@
   asm_.StoreToOffset(kStoreWord, scratch, tr, thr_offs.Int32Value());
 }
 
-void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
-  asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value());
+void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) {
+  if (tag_sp) {
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    vixl32::Register reg = temps.Acquire();
+    ___ Orr(reg, sp, 0x2);
+    asm_.StoreToOffset(kStoreWord, reg, tr, thr_offs.Int32Value());
+  } else {
+    asm_.StoreToOffset(kStoreWord, sp, tr, thr_offs.Int32Value());
+  }
 }
 
 void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index ed453ae..980de41 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -70,7 +70,7 @@
 
   void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
 
-  void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) override;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 50ca468..323a01e 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -218,10 +218,13 @@
   ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
 
-void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) {
+void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs, bool tag_sp) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   Register scratch = temps.AcquireX();
   ___ Mov(scratch, reg_x(SP));
+  if (tag_sp) {
+    ___ Orr(scratch, scratch, 0x2);
+  }
   ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
 
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 2c04184..daea95d 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -72,7 +72,7 @@
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
   void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override;
-  void StoreStackPointerToThread(ThreadOffset64 thr_offs) override;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) override;
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines.
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index b2d4dcd..f867a05 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -159,7 +159,8 @@
   __ StoreRef(FrameOffset(48), scratch_register);
   __ StoreSpanning(FrameOffset(48), method_register, FrameOffset(48));
   __ StoreStackOffsetToThread(ThreadOffset32(512), FrameOffset(4096));
-  __ StoreStackPointerToThread(ThreadOffset32(512));
+  __ StoreStackPointerToThread(ThreadOffset32(512), false);
+  __ StoreStackPointerToThread(ThreadOffset32(512), true);
 
   // Other
   __ Call(method_register, FrameOffset(48));
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 1775014..dac21ae 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -41,46 +41,46 @@
   "      7e: 0d f5 80 5c   add.w r12, sp, #4096\n"
   "      82: c9 f8 00 c2   str.w r12, [r9, #512]\n"
   "      86: c9 f8 00 d2   str.w sp, [r9, #512]\n"
-  "      8a: d0 f8 30 e0   ldr.w lr, [r0, #48]\n"
-  "      8e: f0 47         blx lr\n"
-  "      90: dd f8 2c c0   ldr.w r12, [sp, #44]\n"
-  "      94: cd f8 30 c0   str.w r12, [sp, #48]\n"
-  "      98: d9 f8 00 c2   ldr.w r12, [r9, #512]\n"
-  "      9c: cd f8 2c c0   str.w r12, [sp, #44]\n"
-  "      a0: dd f8 2c c0   ldr.w r12, [sp, #44]\n"
-  "      a4: cd f8 30 c0   str.w r12, [sp, #48]\n"
-  "      a8: 48 46         mov r0, r9\n"
-  "      aa: cd f8 30 90   str.w r9, [sp, #48]\n"
-  "      ae: 04 46         mov r4, r0\n"
-  "      b0: 0d f1 30 0c   add.w r12, sp, #48\n"
-  "      b4: bb f1 00 0f   cmp.w r11, #0\n"
-  "      b8: 18 bf         it ne\n"
-  "      ba: e3 46         movne r11, r12\n"
-  "      bc: 0d f1 30 0b   add.w r11, sp, #48\n"
-  "      c0: 5f ea 0b 00   movs.w r0, r11\n"
-  "      c4: 18 bf         it ne\n"
-  "      c6: 0c a8         addne r0, sp, #48\n"
-  "      c8: dd f8 40 c0   ldr.w r12, [sp, #64]\n"
-  "      cc: bc f1 00 0f   cmp.w r12, #0\n"
-  "      d0: 18 bf         it ne\n"
-  "      d2: 0d f1 40 0c   addne.w r12, sp, #64\n"
-  "      d6: cd f8 30 c0   str.w r12, [sp, #48]\n"
-  "      da: 5f ea 0b 00   movs.w r0, r11\n"
-  "      de: 18 bf         it ne\n"
-  "      e0: 00 a8         addne r0, sp, #0\n"
-  "      e2: 0d f2 04 40   addw r0, sp, #1028\n"
-  "      e6: bb f1 00 0f   cmp.w r11, #0\n"
-  "      ea: 08 bf         it eq\n"
-  "      ec: 58 46         moveq r0, r11\n"
-  "      ee: 0d f2 04 4c   addw r12, sp, #1028\n"
-  "      f2: bb f1 00 0f   cmp.w r11, #0\n"
-  "      f6: 18 bf         it ne\n"
-  "      f8: e3 46         movne r11, r12\n"
-  "      fa: d9 f8 9c c0   ldr.w r12, [r9, #156]\n"
-  "      fe: bc f1 00 0f   cmp.w r12, #0\n"
-  "     102: 71 d1         bne 0x1e8     @ imm = #226\n"
-  "     104: cd f8 ff c7   str.w r12, [sp, #2047]\n"
-  "     108: cd f8 ff c7   str.w r12, [sp, #2047]\n"
+  "      8a: 4d f0 02 0c   orr r12, sp, #2\n"
+  "      8e: c9 f8 00 c2   str.w r12, [r9, #512]\n"
+  "      92: d0 f8 30 e0   ldr.w lr, [r0, #48]\n"
+  "      96: f0 47         blx lr\n"
+  "      98: dd f8 2c c0   ldr.w r12, [sp, #44]\n"
+  "      9c: cd f8 30 c0   str.w r12, [sp, #48]\n"
+  "      a0: d9 f8 00 c2   ldr.w r12, [r9, #512]\n"
+  "      a4: cd f8 2c c0   str.w r12, [sp, #44]\n"
+  "      a8: dd f8 2c c0   ldr.w r12, [sp, #44]\n"
+  "      ac: cd f8 30 c0   str.w r12, [sp, #48]\n"
+  "      b0: 48 46         mov r0, r9\n"
+  "      b2: cd f8 30 90   str.w r9, [sp, #48]\n"
+  "      b6: 04 46         mov r4, r0\n"
+  "      b8: 0d f1 30 0c   add.w r12, sp, #48\n"
+  "      bc: bb f1 00 0f   cmp.w r11, #0\n"
+  "      c0: 18 bf         it ne\n"
+  "      c2: e3 46         movne r11, r12\n"
+  "      c4: 0d f1 30 0b   add.w r11, sp, #48\n"
+  "      c8: 5f ea 0b 00   movs.w r0, r11\n"
+  "      cc: 18 bf         it ne\n"
+  "      ce: 0c a8         addne r0, sp, #48\n"
+  "      d0: dd f8 40 c0   ldr.w r12, [sp, #64]\n"
+  "      d4: bc f1 00 0f   cmp.w r12, #0\n"
+  "      d8: 18 bf         it ne\n"
+  "      da: 0d f1 40 0c   addne.w r12, sp, #64\n"
+  "      de: cd f8 30 c0   str.w r12, [sp, #48]\n"
+  "      e2: 5f ea 0b 00   movs.w r0, r11\n"
+  "      e6: 18 bf         it ne\n"
+  "      e8: 00 a8         addne r0, sp, #0\n"
+  "      ea: 0d f2 04 40   addw r0, sp, #1028\n"
+  "      ee: bb f1 00 0f   cmp.w r11, #0\n"
+  "      f2: 08 bf         it eq\n"
+  "      f4: 58 46         moveq r0, r11\n"
+  "      f6: 0d f2 04 4c   addw r12, sp, #1028\n"
+  "      fa: bb f1 00 0f   cmp.w r11, #0\n"
+  "      fe: 18 bf         it ne\n"
+  "     100: e3 46         movne r11, r12\n"
+  "     102: d9 f8 9c c0   ldr.w r12, [r9, #156]\n"
+  "     106: bc f1 00 0f   cmp.w r12, #0\n"
+  "     10a: 71 d1         bne 0x1f0     @ imm = #226\n"
   "     10c: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     110: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     114: cd f8 ff c7   str.w r12, [sp, #2047]\n"
@@ -135,26 +135,28 @@
   "     1d8: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     1dc: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     1e0: cd f8 ff c7   str.w r12, [sp, #2047]\n"
-  "     1e4: 00 f0 02 b8   b.w 0x1ec     @ imm = #4\n"
-  "     1e8: 00 f0 1b b8   b.w 0x222     @ imm = #54\n"
-  "     1ec: cd f8 ff c7   str.w r12, [sp, #2047]\n"
-  "     1f0: cd f8 ff c7   str.w r12, [sp, #2047]\n"
+  "     1e4: cd f8 ff c7   str.w r12, [sp, #2047]\n"
+  "     1e8: cd f8 ff c7   str.w r12, [sp, #2047]\n"
+  "     1ec: 00 f0 02 b8   b.w 0x1f4     @ imm = #4\n"
+  "     1f0: 00 f0 1b b8   b.w 0x22a     @ imm = #54\n"
   "     1f4: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     1f8: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     1fc: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     200: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     204: cd f8 ff c7   str.w r12, [sp, #2047]\n"
   "     208: cd f8 ff c7   str.w r12, [sp, #2047]\n"
-  "     20c: 0d f5 80 5d   add.w sp, sp, #4096\n"
-  "     210: 08 b0         add sp, #32\n"
-  "     212: 01 b0         add sp, #4\n"
-  "     214: bd ec 10 8a   vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n"
-  "     218: bd e8 e0 4d   pop.w {r5, r6, r7, r8, r10, r11, lr}\n"
-  "     21c: d9 f8 24 80   ldr.w r8, [r9, #36]\n"
-  "     220: 70 47         bx lr\n"
-  "     222: d9 f8 9c 00   ldr.w r0, [r9, #156]\n"
-  "     226: d9 f8 d0 e2   ldr.w lr, [r9, #720]\n"
-  "     22a: f0 47         blx lr\n"
+  "     20c: cd f8 ff c7   str.w r12, [sp, #2047]\n"
+  "     210: cd f8 ff c7   str.w r12, [sp, #2047]\n"
+  "     214: 0d f5 80 5d   add.w sp, sp, #4096\n"
+  "     218: 08 b0         add sp, #32\n"
+  "     21a: 01 b0         add sp, #4\n"
+  "     21c: bd ec 10 8a   vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}\n"
+  "     220: bd e8 e0 4d   pop.w {r5, r6, r7, r8, r10, r11, lr}\n"
+  "     224: d9 f8 24 80   ldr.w r8, [r9, #36]\n"
+  "     228: 70 47         bx lr\n"
+  "     22a: d9 f8 9c 00   ldr.w r0, [r9, #156]\n"
+  "     22e: d9 f8 d0 e2   ldr.w lr, [r9, #720]\n"
+  "     232: f0 47         blx lr\n"
 };
 
 const char* const VixlLoadFromOffsetResults = {
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 7022e3d..c8c713a 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -126,7 +126,11 @@
   virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs,
                                         FrameOffset fr_offs) = 0;
 
-  virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0;
+  // Stores stack pointer by tagging it if required so we can walk the stack. In debuggable runtimes
+  // we use tag to tell if we are using JITed code or AOT code. In non-debuggable runtimes we never
+  // use JITed code when AOT code is present. So checking for AOT code is sufficient to detect which
+  // code is being executed. We avoid tagging in non-debuggable runtimes to reduce instructions.
+  virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs, bool tag_sp) = 0;
 
   virtual void StoreSpanning(FrameOffset dest,
                              ManagedRegister src,
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 685f5f1..55d5428 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -187,8 +187,18 @@
   __ fs()->movl(Address::Absolute(thr_offs), scratch);
 }
 
-void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
-  __ fs()->movl(Address::Absolute(thr_offs), ESP);
+void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) {
+  if (tag_sp) {
+    // There is no free register, store contents onto stack and restore back later.
+    Register scratch = ECX;
+    __ movl(Address(ESP, -32), scratch);
+    __ movl(scratch, ESP);
+    __ orl(scratch, Immediate(0x2));
+    __ fs()->movl(Address::Absolute(thr_offs), scratch);
+    __ movl(scratch, Address(ESP, -32));
+  } else {
+    __ fs()->movl(Address::Absolute(thr_offs), ESP);
+  }
 }
 
 void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 29fccfd..f8ce38b 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -66,7 +66,7 @@
 
   void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
 
-  void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs, bool tag_sp) override;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index d5d1bba..adc431f 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -217,8 +217,15 @@
   __ gs()->movq(Address::Absolute(thr_offs, true), scratch);
 }
 
-void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
-  __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
+void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) {
+  if (tag_sp) {
+    CpuRegister reg = GetScratchRegister();
+    __ movq(reg, CpuRegister(RSP));
+    __ orq(reg, Immediate(0x2));
+    __ gs()->movq(Address::Absolute(thr_offs, true), reg);
+  } else {
+    __ gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
+  }
 }
 
 void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index e080f0b..feaf27e 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -67,7 +67,7 @@
 
   void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override;
 
-  void StoreStackPointerToThread(ThreadOffset64 thr_offs) override;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs, bool tag_sp) override;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S
index fc57df7..7270d20 100644
--- a/runtime/arch/arm/jni_entrypoints_arm.S
+++ b/runtime/arch/arm/jni_entrypoints_arm.S
@@ -99,7 +99,7 @@
     // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable()
     // for @FastNative or @CriticalNative.
     ldr    ip, [r0, #THREAD_TOP_QUICK_FRAME_OFFSET]   // uintptr_t tagged_quick_frame
-    bic    ip, #1                                     // ArtMethod** sp
+    bic    ip, #TAGGED_JNI_SP_MASK                    // ArtMethod** sp
     ldr    ip, [ip]                                   // ArtMethod* method
     ldr    ip, [ip, #ART_METHOD_ACCESS_FLAGS_OFFSET]  // uint32_t access_flags
     tst    ip, #(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE)
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
index 463767c..b3ea40d 100644
--- a/runtime/arch/arm64/jni_entrypoints_arm64.S
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -103,7 +103,7 @@
     // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable()
     // for @FastNative or @CriticalNative.
     ldr   xIP0, [x0, #THREAD_TOP_QUICK_FRAME_OFFSET]      // uintptr_t tagged_quick_frame
-    bic   xIP0, xIP0, #1                                  // ArtMethod** sp
+    bic   xIP0, xIP0, #TAGGED_JNI_SP_MASK                 // ArtMethod** sp
     ldr   xIP0, [xIP0]                                    // ArtMethod* method
     ldr   xIP0, [xIP0, #ART_METHOD_ACCESS_FLAGS_OFFSET]   // uint32_t access_flags
     mov   xIP1, #(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE)
diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S
index d827509..4b43814 100644
--- a/runtime/arch/x86/jni_entrypoints_x86.S
+++ b/runtime/arch/x86/jni_entrypoints_x86.S
@@ -98,7 +98,7 @@
     // for @FastNative or @CriticalNative.
     movl (%esp), %eax                                // Thread* self
     movl THREAD_TOP_QUICK_FRAME_OFFSET(%eax), %eax   // uintptr_t tagged_quick_frame
-    andl LITERAL(0xfffffffe), %eax                   // ArtMethod** sp
+    andl LITERAL(TAGGED_JNI_SP_MASK_TOGGLED32), %eax // ArtMethod** sp
     movl (%eax), %eax                                // ArtMethod* method
     testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE), \
           ART_METHOD_ACCESS_FLAGS_OFFSET(%eax)
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index 0d5fa3f..d2f1fe1 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -118,7 +118,7 @@
     // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable()
     // for @FastNative or @CriticalNative.
     movq THREAD_TOP_QUICK_FRAME_OFFSET(%rdi), %rax   // uintptr_t tagged_quick_frame
-    andq LITERAL(0xfffffffffffffffe), %rax           // ArtMethod** sp
+    andq LITERAL(TAGGED_JNI_SP_MASK_TOGGLED64), %rax // ArtMethod** sp
     movq (%rax), %rax                                // ArtMethod* method
     testl LITERAL(ACCESS_FLAGS_METHOD_IS_FAST_NATIVE | ACCESS_FLAGS_METHOD_IS_CRITICAL_NATIVE), \
           ART_METHOD_ACCESS_FLAGS_OFFSET(%rax)
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 867f75c..40b7a7b 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -617,6 +617,15 @@
   }
 
   OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromEntryPoint(oat_entry_point);
+  // We could have existing Oat code for native methods but we may not use it if the runtime is java
+  // debuggable or when profiling boot class path. There is no easy way to check if the pc
+  // corresponds to QuickGenericJniStub. Since we have eliminated all the other cases, if the pc
+  // doesn't correspond to the AOT code then we must be running QuickGenericJniStub.
+  if (IsNative() && !method_header->Contains(pc)) {
+    DCHECK_NE(pc, 0u) << "PC 0 for " << PrettyMethod();
+    return nullptr;
+  }
+
   DCHECK(method_header->Contains(pc))
       << PrettyMethod()
       << " " << std::hex << pc << " " << oat_entry_point
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index c78b604..80eb89f 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -87,11 +87,11 @@
     }
 
     // Replace the runtime method on the stack with the target method.
-    DCHECK(!self->GetManagedStack()->GetTopQuickFrameTag());
+    DCHECK(!self->GetManagedStack()->GetTopQuickFrameGenericJniTag());
     ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrameKnownNotTagged();
     DCHECK(*sp == Runtime::Current()->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs));
     *sp = target_method;
-    self->SetTopOfStackTagged(sp);  // Fake GenericJNI frame.
+    self->SetTopOfStackGenericJniTagged(sp);  // Fake GenericJNI frame.
 
     // Continue with the target method.
     method = target_method;
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index f1d6813..ea6501c 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2083,7 +2083,7 @@
   }
 
   // Fix up managed-stack things in Thread. After this we can walk the stack.
-  self->SetTopOfStackTagged(managed_sp);
+  self->SetTopOfStackGenericJniTagged(managed_sp);
 
   self->VerifyStack();
 
@@ -2177,7 +2177,7 @@
   // anything that requires a mutator lock before that would cause problems as GC may have the
   // exclusive mutator lock and may be moving objects, etc.
   ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame();
-  DCHECK(self->GetManagedStack()->GetTopQuickFrameTag());
+  DCHECK(self->GetManagedStack()->GetTopQuickFrameGenericJniTag());
   uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp);
   ArtMethod* called = *sp;
   uint32_t cookie = *(sp32 - 1);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 24bc6b8..58a98c8 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -310,16 +310,11 @@
          Runtime::Current()->GetRuntimeCallbacks()->IsMethodBeingInspected(method);
 }
 
-static bool CanUseAotCode(ArtMethod* method, const void* quick_code)
+static bool CanUseAotCode(const void* quick_code)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (quick_code == nullptr) {
     return false;
   }
-  if (method->IsNative()) {
-    // AOT code for native methods can always be used.
-    return true;
-  }
-
   Runtime* runtime = Runtime::Current();
   // For simplicity, we never use AOT code for debuggable.
   if (runtime->IsJavaDebuggable()) {
@@ -355,7 +350,7 @@
   // In debuggable mode, we can only use AOT code for native methods.
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   const void* aot_code = method->GetOatMethodQuickCode(class_linker->GetImagePointerSize());
-  if (CanUseAotCode(method, aot_code)) {
+  if (CanUseAotCode(aot_code)) {
     return aot_code;
   }
 
@@ -414,7 +409,7 @@
   }
 
   // Use the provided AOT code if possible.
-  if (CanUseAotCode(method, aot_code)) {
+  if (CanUseAotCode(aot_code)) {
     UpdateEntryPoints(method, aot_code);
     return;
   }
diff --git a/runtime/managed_stack.h b/runtime/managed_stack.h
index 04a27fe..0e7dfe3 100644
--- a/runtime/managed_stack.h
+++ b/runtime/managed_stack.h
@@ -43,6 +43,8 @@
 // code.
 class PACKED(4) ManagedStack {
  public:
+  static size_t constexpr kTaggedJniSpMask = 0x3;
+
   ManagedStack()
       : tagged_top_quick_frame_(TaggedTopQuickFrame::CreateNotTagged(nullptr)),
         link_(nullptr),
@@ -75,8 +77,12 @@
     return tagged_top_quick_frame_.GetSp();
   }
 
-  bool GetTopQuickFrameTag() const {
-    return tagged_top_quick_frame_.GetTag();
+  bool GetTopQuickFrameGenericJniTag() const {
+    return tagged_top_quick_frame_.GetGenericJniTag();
+  }
+
+  bool GetTopQuickFrameJitJniTag() const {
+    return tagged_top_quick_frame_.GetJitJniTag();
   }
 
   bool HasTopQuickFrame() const {
@@ -89,10 +95,10 @@
     tagged_top_quick_frame_ = TaggedTopQuickFrame::CreateNotTagged(top);
   }
 
-  void SetTopQuickFrameTagged(ArtMethod** top) {
+  void SetTopQuickFrameGenericJniTagged(ArtMethod** top) {
     DCHECK(top_shadow_frame_ == nullptr);
     DCHECK_ALIGNED(top, 4u);
-    tagged_top_quick_frame_ = TaggedTopQuickFrame::CreateTagged(top);
+    tagged_top_quick_frame_ = TaggedTopQuickFrame::CreateGenericJniTagged(top);
   }
 
   static constexpr size_t TaggedTopQuickFrameOffset() {
@@ -129,26 +135,30 @@
       return TaggedTopQuickFrame(reinterpret_cast<uintptr_t>(sp));
     }
 
-    static TaggedTopQuickFrame CreateTagged(ArtMethod** sp) {
+    static TaggedTopQuickFrame CreateGenericJniTagged(ArtMethod** sp) {
       DCHECK_ALIGNED(sp, 4u);
       return TaggedTopQuickFrame(reinterpret_cast<uintptr_t>(sp) | 1u);
     }
 
     // Get SP known to be not tagged and non-null.
     ArtMethod** GetSpKnownNotTagged() const {
-      DCHECK(!GetTag());
+      DCHECK(!GetGenericJniTag() && !GetJitJniTag());
       DCHECK_NE(tagged_sp_, 0u);
       return reinterpret_cast<ArtMethod**>(tagged_sp_);
     }
 
     ArtMethod** GetSp() const {
-      return reinterpret_cast<ArtMethod**>(tagged_sp_ & ~static_cast<uintptr_t>(1u));
+      return reinterpret_cast<ArtMethod**>(tagged_sp_ & ~static_cast<uintptr_t>(kTaggedJniSpMask));
     }
 
-    bool GetTag() const {
+    bool GetGenericJniTag() const {
       return (tagged_sp_ & 1u) != 0u;
     }
 
+    bool GetJitJniTag() const {
+      return (tagged_sp_ & 2u) != 0u;
+    }
+
     uintptr_t GetTaggedSp() const {
       return tagged_sp_;
     }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 9a6c8a5..d2eb3bd 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -3147,15 +3147,19 @@
     auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
     for (auto& m : klass->GetMethods(pointer_size)) {
       const void* code = m.GetEntryPointFromQuickCompiledCode();
+      // For java debuggable runtimes we also deoptimize native methods. For other cases (boot
+      // image profiling) we don't need to deoptimize native methods. If this changes also
+      // update Instrumentation::CanUseAotCode.
+      bool deoptimize_native_methods = Runtime::Current()->IsJavaDebuggable();
       if (Runtime::Current()->GetHeap()->IsInBootImageOatFile(code) &&
-          !m.IsNative() &&
+          (!m.IsNative() || deoptimize_native_methods) &&
           !m.IsProxyMethod()) {
         instrumentation_->InitializeMethodsCode(&m, /*aot_code=*/ nullptr);
       }
 
       if (Runtime::Current()->GetJit() != nullptr &&
           Runtime::Current()->GetJit()->GetCodeCache()->IsInZygoteExecSpace(code) &&
-          !m.IsNative()) {
+          (!m.IsNative() || deoptimize_native_methods)) {
         DCHECK(!m.IsProxyMethod());
         instrumentation_->InitializeMethodsCode(&m, /*aot_code=*/ nullptr);
       }
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 50a96d0..33d3668 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -800,10 +800,20 @@
         // between GenericJNI frame and JIT-compiled JNI stub; the entrypoint may have
         // changed since the frame was entered. The top quick frame tag indicates
         // GenericJNI here, otherwise it's either AOT-compiled or JNI-compiled JNI stub.
-        if (UNLIKELY(current_fragment->GetTopQuickFrameTag())) {
+        if (UNLIKELY(current_fragment->GetTopQuickFrameGenericJniTag())) {
           // The generic JNI does not have any method header.
           cur_oat_quick_method_header_ = nullptr;
+        } else if (UNLIKELY(current_fragment->GetTopQuickFrameJitJniTag())) {
+          // Should be JITed code.
+          Runtime* runtime = Runtime::Current();
+          const void* code = runtime->GetJit()->GetCodeCache()->GetJniStubCode(method);
+          CHECK(code != nullptr) << method->PrettyMethod();
+          cur_oat_quick_method_header_ = OatQuickMethodHeader::FromCodePointer(code);
         } else {
+          // We are sure we are not running GenericJni here. Though the entry point could still be
+          // GenericJnistub. The entry point is usually JITed, AOT or instrumentation stub when
+          // instrumentation is enabled. It could be lso a resolution stub if the class isn't
+          // visibly initialized yet.
           const void* existing_entry_point = method->GetEntryPointFromQuickCompiledCode();
           CHECK(existing_entry_point != nullptr);
           Runtime* runtime = Runtime::Current();
@@ -819,7 +829,11 @@
             if (code != nullptr) {
               cur_oat_quick_method_header_ = OatQuickMethodHeader::FromEntryPoint(code);
             } else {
-              // This must be a JITted JNI stub frame.
+              // This must be a JITted JNI stub frame. For non-debuggable runtimes we only generate
+              // JIT stubs if there are no AOT stubs for native methods. Since we checked for AOT
+              // code earlier, we must be running JITed code. For debuggable runtimes we might have
+              // JIT code even when AOT code is present but we tag SP in JITed JNI stubs
+              // in debuggable runtimes. This case is handled earlier.
               CHECK(runtime->GetJit() != nullptr);
               code = runtime->GetJit()->GetCodeCache()->GetJniStubCode(method);
               CHECK(code != nullptr) << method->PrettyMethod();
diff --git a/runtime/thread.h b/runtime/thread.h
index b32e3c2..7ac4007 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -573,8 +573,8 @@
     tlsPtr_.managed_stack.SetTopQuickFrame(top_method);
   }
 
-  void SetTopOfStackTagged(ArtMethod** top_method) {
-    tlsPtr_.managed_stack.SetTopQuickFrameTagged(top_method);
+  void SetTopOfStackGenericJniTagged(ArtMethod** top_method) {
+    tlsPtr_.managed_stack.SetTopQuickFrameGenericJniTagged(top_method);
   }
 
   void SetTopOfShadowStack(ShadowFrame* top) {
diff --git a/tools/cpp-define-generator/globals.def b/tools/cpp-define-generator/globals.def
index 8dcc023..459e5a8 100644
--- a/tools/cpp-define-generator/globals.def
+++ b/tools/cpp-define-generator/globals.def
@@ -86,3 +86,8 @@
 ASM_DEFINE(CALLEE_SAVE_EVERYTHING_NUM_CORE_SPILLS,
            art::POPCOUNT(art::RuntimeCalleeSaveFrame::GetCoreSpills(
                art::CalleeSaveType::kSaveEverything)))
+ASM_DEFINE(TAGGED_JNI_SP_MASK, art::ManagedStack::kTaggedJniSpMask)
+ASM_DEFINE(TAGGED_JNI_SP_MASK_TOGGLED32,
+           ~static_cast<uint32_t>(art::ManagedStack::kTaggedJniSpMask))
+ASM_DEFINE(TAGGED_JNI_SP_MASK_TOGGLED64,
+           ~static_cast<uint64_t>(art::ManagedStack::kTaggedJniSpMask))