Revert "JNI: Remove `JniMethodFast{Start,End}()`."

This reverts commit 64d6e187f19ed670429652020561887e6b220216.

Reason for revert: Breaks no-image JIT run tests (flaky).

Bug: 172332525
Change-Id: I7813d89283eff0f6266318d3fb02d1257471798d
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 9e3bb86..7980e18 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -69,8 +69,7 @@
         JniCallingConvention::Create(&allocator,
                                      is_static,
                                      is_synchronized,
-                                     /*is_fast_native=*/ false,
-                                     /*is_critical_native=*/ false,
+                                     /*is_critical_native*/false,
                                      shorty,
                                      isa));
     std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 68c7a94..4d0d813 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -287,12 +287,10 @@
 
 ArmJniCallingConvention::ArmJniCallingConvention(bool is_static,
                                                  bool is_synchronized,
-                                                 bool is_fast_native,
                                                  bool is_critical_native,
                                                  const char* shorty)
     : JniCallingConvention(is_static,
                            is_synchronized,
-                           is_fast_native,
                            is_critical_native,
                            shorty,
                            kArmPointerSize) {
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 149ba39..fad60c8 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -56,7 +56,6 @@
  public:
   ArmJniCallingConvention(bool is_static,
                           bool is_synchronized,
-                          bool is_fast_native,
                           bool is_critical_native,
                           const char* shorty);
   ~ArmJniCallingConvention() override {}
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 7b9a597..83b936a 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -215,12 +215,10 @@
 
 Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static,
                                                      bool is_synchronized,
-                                                     bool is_fast_native,
                                                      bool is_critical_native,
                                                      const char* shorty)
     : JniCallingConvention(is_static,
                            is_synchronized,
-                           is_fast_native,
                            is_critical_native,
                            shorty,
                            kArm64PointerSize) {
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index ade88e4..0836160 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -48,7 +48,6 @@
  public:
   Arm64JniCallingConvention(bool is_static,
                             bool is_synchronized,
-                            bool is_fast_native,
                             bool is_critical_native,
                             const char* shorty);
   ~Arm64JniCallingConvention() override {}
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index e7a84fd..fd05941 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -134,7 +134,6 @@
 std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocator* allocator,
                                                                    bool is_static,
                                                                    bool is_synchronized,
-                                                                   bool is_fast_native,
                                                                    bool is_critical_native,
                                                                    const char* shorty,
                                                                    InstructionSet instruction_set) {
@@ -144,25 +143,25 @@
     case InstructionSet::kThumb2:
       return std::unique_ptr<JniCallingConvention>(
           new (allocator) arm::ArmJniCallingConvention(
-              is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
+              is_static, is_synchronized, is_critical_native, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case InstructionSet::kArm64:
       return std::unique_ptr<JniCallingConvention>(
           new (allocator) arm64::Arm64JniCallingConvention(
-              is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
+              is_static, is_synchronized, is_critical_native, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case InstructionSet::kX86:
       return std::unique_ptr<JniCallingConvention>(
           new (allocator) x86::X86JniCallingConvention(
-              is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
+              is_static, is_synchronized, is_critical_native, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case InstructionSet::kX86_64:
       return std::unique_ptr<JniCallingConvention>(
           new (allocator) x86_64::X86_64JniCallingConvention(
-              is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
+              is_static, is_synchronized, is_critical_native, shorty));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index faa83da..e62fc33 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -291,7 +291,6 @@
   static std::unique_ptr<JniCallingConvention> Create(ArenaAllocator* allocator,
                                                       bool is_static,
                                                       bool is_synchronized,
-                                                      bool is_fast_native,
                                                       bool is_critical_native,
                                                       const char* shorty,
                                                       InstructionSet instruction_set);
@@ -349,10 +348,6 @@
     return 4u;
   }
 
-  bool IsFastNative() const {
-    return is_fast_native_;
-  }
-
   bool IsCriticalNative() const {
     return is_critical_native_;
   }
@@ -381,10 +376,9 @@
 
   // Does the transition back spill the return value in the stack frame?
   bool SpillsReturnValue() const {
-    // Exclude return value for @FastNative and @CriticalNative methods for optimization speed.
+    // Exclude return value for @CriticalNative methods for optimization speed.
     // References are passed directly to the "end method" and there is nothing to save for `void`.
-    return (!IsFastNative() && !IsCriticalNative()) &&
-           (!IsReturnAReference() && SizeOfReturnValue() != 0u);
+    return !IsCriticalNative() && !IsReturnAReference() && SizeOfReturnValue() != 0u;
   }
 
  protected:
@@ -396,12 +390,10 @@
 
   JniCallingConvention(bool is_static,
                        bool is_synchronized,
-                       bool is_fast_native,
                        bool is_critical_native,
                        const char* shorty,
                        PointerSize frame_pointer_size)
       : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size),
-        is_fast_native_(is_fast_native),
         is_critical_native_(is_critical_native) {}
 
  protected:
@@ -434,7 +426,6 @@
   // Is the current argument (at the iterator) an extra argument for JNI?
   bool IsCurrentArgExtraForJni() const;
 
-  const bool is_fast_native_;
   const bool is_critical_native_;
 
  private:
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 9d96372..5752c75 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -86,12 +86,15 @@
 template <PointerSize kPointerSize>
 static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint which,
                                                                bool reference_return,
-                                                               bool is_synchronized) {
+                                                               bool is_synchronized,
+                                                               bool is_fast_native) {
   if (which == JniEntrypoint::kStart) {  // JniMethodStart
     ThreadOffset<kPointerSize> jni_start =
         is_synchronized
             ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
-            : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart);
+            : (is_fast_native
+                   ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
+                   : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
 
     return jni_start;
   } else {  // JniMethodEnd
@@ -100,11 +103,15 @@
       // Pass result.
       jni_end = is_synchronized
                     ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
-                    : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
+                    : (is_fast_native
+                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEndWithReference)
+                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference));
     } else {
       jni_end = is_synchronized
                     ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
-                    : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd);
+                    : (is_fast_native
+                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
+                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
     }
 
     return jni_end;
@@ -122,7 +129,6 @@
                                                      uint32_t access_flags,
                                                      uint32_t method_idx,
                                                      const DexFile& dex_file) {
-  constexpr size_t kRawPointerSize = static_cast<size_t>(kPointerSize);
   const bool is_native = (access_flags & kAccNative) != 0;
   CHECK(is_native);
   const bool is_static = (access_flags & kAccStatic) != 0;
@@ -188,7 +194,6 @@
       JniCallingConvention::Create(&allocator,
                                    is_static,
                                    is_synchronized,
-                                   is_fast_native,
                                    is_critical_native,
                                    shorty,
                                    instruction_set);
@@ -213,7 +218,6 @@
       JniCallingConvention::Create(&allocator,
                                    is_static,
                                    is_synchronized,
-                                   is_fast_native,
                                    is_critical_native,
                                    jni_end_shorty,
                                    instruction_set));
@@ -239,7 +243,6 @@
     // Spill all register arguments.
     // TODO: Pass these in a single call to let the assembler use multi-register stores.
     // TODO: Spill native stack args straight to their stack locations (adjust SP earlier).
-    // TODO: For @FastNative, move args in registers, spill only references.
     mr_conv->ResetIterator(FrameOffset(current_frame_size));
     for (; mr_conv->HasNext(); mr_conv->Next()) {
       if (mr_conv->IsCurrentParamInRegister()) {
@@ -254,6 +257,7 @@
     // NOTE: @CriticalNative does not need to store the stack pointer to the thread
     //       because garbage collections are disabled within the execution of a
     //       @CriticalNative method.
+    //       (TODO: We could probably disable it for @FastNative too).
   }  // if (!is_critical_native)
 
   // 3. Move frame down to allow space for out going args.
@@ -286,12 +290,13 @@
   //    two pointer arguments.
   std::unique_ptr<JNIMacroLabel> monitor_enter_exception_slow_path =
       UNLIKELY(is_synchronized) ? __ CreateLabel() : nullptr;
-  if (LIKELY(!is_critical_native && !is_fast_native)) {
-    // Skip this for @CriticalNative and @FastNative methods. They do not call JniMethodStart.
+  if (LIKELY(!is_critical_native)) {
+    // Skip this for @CriticalNative methods. They do not call JniMethodStart.
     ThreadOffset<kPointerSize> jni_start =
         GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart,
                                                    reference_return,
-                                                   is_synchronized);
+                                                   is_synchronized,
+                                                   is_fast_native);
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     if (is_synchronized) {
       // Pass object for locking.
@@ -358,8 +363,9 @@
     ArenaVector<ArgumentLocation> src_args(allocator.Adapter());
     ArenaVector<ArgumentLocation> dest_args(allocator.Adapter());
     // Move the method pointer to the hidden argument register.
-    dest_args.push_back(ArgumentLocation(main_jni_conv->HiddenArgumentRegister(), kRawPointerSize));
-    src_args.push_back(ArgumentLocation(mr_conv->MethodRegister(), kRawPointerSize));
+    size_t pointer_size = static_cast<size_t>(kPointerSize);
+    dest_args.push_back(ArgumentLocation(main_jni_conv->HiddenArgumentRegister(), pointer_size));
+    src_args.push_back(ArgumentLocation(mr_conv->MethodRegister(), pointer_size));
     // Move normal arguments to their locations.
     mr_conv->ResetIterator(FrameOffset(current_frame_size));
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
@@ -376,26 +382,6 @@
     DCHECK(!main_jni_conv->HasNext());
     __ MoveArguments(ArrayRef<ArgumentLocation>(dest_args), ArrayRef<ArgumentLocation>(src_args));
   } else {
-    if (UNLIKELY(!method_register.IsNoRegister())) {
-      DCHECK(is_fast_native);
-      // In general, we do not know if the method register shall be clobbered by initializing
-      // some argument below. However, for most supported architectures (arm, arm64, x86_64),
-      // the `method_register` is the same as the `JNIEnv*` argument register which is
-      // initialized last, so we can quickly check that case and use the original method
-      // register to initialize the `jclass` for static methods. Otherwise, move the method
-      // to the `callee_save_temp` as we shall need it for the call.
-      main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
-      if (main_jni_conv->IsCurrentParamInRegister() &&
-          main_jni_conv->CurrentParamRegister().Equals(method_register) &&
-          is_static) {
-        // Keep the current `method_register`.
-      } else {
-        ManagedRegister new_method_reg = __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize);
-        __ Move(new_method_reg, method_register, kRawPointerSize);
-        method_register = new_method_reg;
-      }
-    }
-
     // Iterate over arguments placing values from managed calling convention in
     // to the convention required for a native call (shuffling). For references
     // place an index/pointer to the reference after checking whether it is
@@ -433,36 +419,25 @@
     }
 
     // 8. For static method, create jclass argument as a pointer to the method's declaring class.
-    //    Make sure the method is in a register even for non-static methods.
-    DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
-    FrameOffset method_offset =
-        FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue());
     if (is_static) {
       main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
       main_jni_conv->Next();  // Skip JNIEnv*
-      // Load reference to the method's declaring class. For normal native, the method register
-      // has been clobbered by the above call, so we need to load the method from the stack.
-      if (method_register.IsNoRegister()) {
-        // Use the `callee_save_temp` if the parameter goes on the stack.
-        method_register = main_jni_conv->IsCurrentParamOnStack()
-            ? __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize)
-            : main_jni_conv->CurrentParamRegister();
-        __ Load(method_register, method_offset, kRawPointerSize);
-      }
-      DCHECK(!method_register.IsNoRegister());
+      // Load reference to the method's declaring class. The method register has been
+      // clobbered by the above call, so we need to load the method from the stack.
+      FrameOffset method_offset =
+          FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue());
+      DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
       if (main_jni_conv->IsCurrentParamOnStack()) {
-        // Store the method argument.
         FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
-        __ Store(out_off, method_register, kRawPointerSize);
+        __ Copy(out_off, method_offset, static_cast<size_t>(kPointerSize));
+        // TODO(x86): Get hold of the register used to copy the method pointer,
+        // so that we can use it also for loading the method entrypoint below.
       } else {
         ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
-        __ Move(out_reg, method_register, kRawPointerSize);  // No-op if equal.
+        __ Load(out_reg, method_offset, static_cast<size_t>(kPointerSize));
+        // Reuse the register also for loading the method entrypoint below.
         method_register = out_reg;
       }
-    } else if (LIKELY(method_register.IsNoRegister())) {
-      // Load the method for non-static methods to `callee_save_temp` as we need it for the call.
-      method_register = __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize);
-      __ Load(method_register, method_offset, kRawPointerSize);
     }
 
     // Set the iterator back to the incoming Method*.
@@ -471,10 +446,10 @@
     // 9. Create 1st argument, the JNI environment ptr.
     if (main_jni_conv->IsCurrentParamInRegister()) {
       ManagedRegister jni_env_arg = main_jni_conv->CurrentParamRegister();
-      __ Move(jni_env_arg, jni_env_reg, kRawPointerSize);
+      __ Move(jni_env_arg, jni_env_reg, static_cast<size_t>(kPointerSize));
     } else {
       FrameOffset jni_env_arg_offset = main_jni_conv->CurrentParamStackOffset();
-      __ Store(jni_env_arg_offset, jni_env_reg, kRawPointerSize);
+      __ Store(jni_env_arg_offset, jni_env_reg, static_cast<size_t>(kPointerSize));
     }
   }
 
@@ -488,11 +463,12 @@
       __ Call(main_jni_conv->HiddenArgumentRegister(), jni_entrypoint_offset);
     }
   } else {
-    DCHECK(method_register.IsRegister());
-    __ Call(method_register, jni_entrypoint_offset);
-    // We shall not need the method register anymore. And we may clobber it below
-    // if it's the `callee_save_temp`, so clear it here to make sure it's not used.
-    method_register = ManagedRegister::NoRegister();
+    if (method_register.IsRegister()) {
+      __ Call(method_register, jni_entrypoint_offset);
+    } else {
+      __ Call(FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue()),
+              jni_entrypoint_offset);
+    }
   }
 
   // 11. Fix differences in result widths.
@@ -523,9 +499,8 @@
     __ Store(return_save_location,
              main_jni_conv->ReturnRegister(),
              main_jni_conv->SizeOfReturnValue());
-  } else if (UNLIKELY(is_fast_native || is_critical_native) &&
-             main_jni_conv->SizeOfReturnValue() != 0) {
-    // For @FastNative and @CriticalNative only,
+  } else if (UNLIKELY(is_critical_native) && main_jni_conv->SizeOfReturnValue() != 0) {
+    // For @CriticalNative only,
     // move the JNI return register into the managed return register (if they don't match).
     ManagedRegister jni_return_reg = main_jni_conv->ReturnRegister();
     ManagedRegister mr_return_reg = mr_conv->ReturnRegister();
@@ -534,7 +509,7 @@
     // If they differ, only then do we have to do anything about it.
     // Otherwise the return value is already in the right place when we return.
     if (!jni_return_reg.Equals(mr_return_reg)) {
-      CHECK(!is_critical_native || !main_jni_conv->UseTailCall());
+      CHECK(!main_jni_conv->UseTailCall());
       // This is typically only necessary on ARM32 due to native being softfloat
       // while managed is hardfloat.
       // -- For example VMOV {r0, r1} -> D0; VMOV r0 -> S0.
@@ -546,30 +521,10 @@
     }
   }
 
-  // 13. For @FastNative that returns a reference, do an early exception check so that the
-  //     `JniDecodeReferenceResult()` in the main path does not need to check for exceptions.
-  std::unique_ptr<JNIMacroLabel> exception_slow_path =
-      LIKELY(!is_critical_native) ? __ CreateLabel() : nullptr;
-  if (UNLIKELY(is_fast_native) && reference_return) {
-    __ ExceptionPoll(exception_slow_path.get());
-  }
-
-  // 14. For @FastNative that returns a reference, do an early suspend check so that we
-  //     do not need to encode the decoded reference in a stack map.
-  std::unique_ptr<JNIMacroLabel> suspend_check_slow_path =
-      UNLIKELY(is_fast_native) ? __ CreateLabel() : nullptr;
-  std::unique_ptr<JNIMacroLabel> suspend_check_resume =
-      UNLIKELY(is_fast_native) ? __ CreateLabel() : nullptr;
-  if (UNLIKELY(is_fast_native) && reference_return) {
-    __ SuspendCheck(suspend_check_slow_path.get());
-    __ Bind(suspend_check_resume.get());
-  }
-
   if (LIKELY(!is_critical_native)) {
     // Increase frame size for out args if needed by the end_jni_conv.
     const size_t end_out_arg_size = end_jni_conv->OutFrameSize();
     if (end_out_arg_size > current_out_arg_size) {
-      DCHECK(!is_fast_native);
       size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
       current_out_arg_size = end_out_arg_size;
       __ IncreaseFrameSize(out_arg_size_diff);
@@ -578,94 +533,84 @@
     }
     end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
 
-    // 15. Call JniMethodEnd for normal native.
-    //     For @FastNative with reference return, decode the `jobject`.
-    if (LIKELY(!is_fast_native) || reference_return) {
-      ThreadOffset<kPointerSize> jni_end = is_fast_native
-          ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult)
-          : GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd,
-                                                       reference_return,
-                                                       is_synchronized);
-      if (reference_return) {
-        // Pass result.
-        SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
-        end_jni_conv->Next();
-      }
-      if (is_synchronized) {
-        // Pass object for unlocking.
-        if (is_static) {
-          // Load reference to the method's declaring class. The method register has been
-          // clobbered by the above call, so we need to load the method from the stack.
-          FrameOffset method_offset =
-              FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue());
-          DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
-          if (end_jni_conv->IsCurrentParamOnStack()) {
-            FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
-            __ Copy(out_off, method_offset, kRawPointerSize);
-          } else {
-            ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
-            __ Load(out_reg, method_offset, kRawPointerSize);
-          }
+    // 13. Call JniMethodEnd
+    ThreadOffset<kPointerSize> jni_end =
+        GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd,
+                                                   reference_return,
+                                                   is_synchronized,
+                                                   is_fast_native);
+    if (reference_return) {
+      // Pass result.
+      SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
+      end_jni_conv->Next();
+    }
+    if (is_synchronized) {
+      // Pass object for unlocking.
+      if (is_static) {
+        // Load reference to the method's declaring class. The method register has been
+        // clobbered by the above call, so we need to load the method from the stack.
+        FrameOffset method_offset =
+            FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue());
+        DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
+        if (end_jni_conv->IsCurrentParamOnStack()) {
+          FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
+          __ Copy(out_off, method_offset, static_cast<size_t>(kPointerSize));
         } else {
-          mr_conv->ResetIterator(FrameOffset(current_frame_size));
-          FrameOffset this_offset = mr_conv->CurrentParamStackOffset();
-          if (end_jni_conv->IsCurrentParamOnStack()) {
-            FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
-            __ CreateJObject(out_off, this_offset, /*null_allowed=*/ false);
-          } else {
-            ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
-            __ CreateJObject(out_reg,
-                             this_offset,
-                             ManagedRegister::NoRegister(),
-                             /*null_allowed=*/ false);
-          }
+          ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
+          __ Load(out_reg, method_offset, static_cast<size_t>(kPointerSize));
         }
-        end_jni_conv->Next();
-      }
-      if (end_jni_conv->IsCurrentParamInRegister()) {
-        __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-        __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end));
       } else {
-        __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset());
-        __ CallFromThread(jni_end);
+        mr_conv->ResetIterator(FrameOffset(current_frame_size));
+        FrameOffset this_offset = mr_conv->CurrentParamStackOffset();
+        if (end_jni_conv->IsCurrentParamOnStack()) {
+          FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
+          __ CreateJObject(out_off, this_offset, /*null_allowed=*/ false);
+        } else {
+          ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
+          __ CreateJObject(out_reg,
+                           this_offset,
+                           ManagedRegister::NoRegister(),
+                           /*null_allowed=*/ false);
+        }
       }
+      end_jni_conv->Next();
+    }
+    if (end_jni_conv->IsCurrentParamInRegister()) {
+      __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
+      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end));
+    } else {
+      __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset());
+      __ CallFromThread(jni_end);
     }
 
-    // 16. Reload return value
+    // 14. Reload return value
     if (spill_return_value) {
       __ Load(mr_conv->ReturnRegister(), return_save_location, mr_conv->SizeOfReturnValue());
     }
   }  // if (!is_critical_native)
 
-  // 17. Pop local reference frame.
-  if (LIKELY(!is_critical_native)) {
+  // 15. Pop local reference frame.
+  if (!is_critical_native) {
     PopLocalReferenceFrame<kPointerSize>(
         jni_asm.get(), jni_env_reg, saved_cookie_reg, callee_save_temp);
   }
 
-  // 18. Move frame up now we're done with the out arg space.
+  // 16. Move frame up now we're done with the out arg space.
   //     @CriticalNative remove out args together with the frame in RemoveFrame().
   if (LIKELY(!is_critical_native)) {
     __ DecreaseFrameSize(current_out_arg_size);
     current_frame_size -= current_out_arg_size;
   }
 
-  // 19. Process pending exceptions from JNI call or monitor exit.
+  // 17. Process pending exceptions from JNI call or monitor exit.
   //     @CriticalNative methods do not need exception poll in the stub.
-  //     @FastNative methods with reference return emit the exception poll earlier.
-  if (LIKELY(!is_critical_native) && (LIKELY(!is_fast_native) || !reference_return)) {
+  std::unique_ptr<JNIMacroLabel> exception_slow_path =
+      LIKELY(!is_critical_native) ? __ CreateLabel() : nullptr;
+  if (LIKELY(!is_critical_native)) {
     __ ExceptionPoll(exception_slow_path.get());
   }
 
-  // 20. For @FastNative, we never transitioned out of runnable, so there is no transition back.
-  //     Perform a suspend check if there is a flag raised, unless we have done that above
-  //     for reference return.
-  if (UNLIKELY(is_fast_native) && !reference_return) {
-    __ SuspendCheck(suspend_check_slow_path.get());
-    __ Bind(suspend_check_resume.get());
-  }
-
-  // 21. Remove activation - need to restore callee save registers since the GC may have changed
+  // 18. Remove activation - need to restore callee save registers since the GC may have changed
   //     them.
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
   if (LIKELY(!is_critical_native) || !main_jni_conv->UseTailCall()) {
@@ -676,7 +621,7 @@
     DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
   }
 
-  // 22. Read barrier slow path for the declaring class in the method for a static call.
+  // 19. Read barrier slow path for the declaring class in the method for a static call.
   //     Skip this for @CriticalNative because we're not passing a `jclass` to the native method.
   if (kUseReadBarrier && is_static && !is_critical_native) {
     __ Bind(jclass_read_barrier_slow_path.get());
@@ -715,12 +660,12 @@
       __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset());
       __ CallFromThread(read_barrier);
     }
-    if (UNLIKELY(is_synchronized || is_fast_native)) {
+    if (is_synchronized) {
       // Reload the method pointer in the slow path because it is needed
-      // as an argument for the `JniMethodStartSynchronized`, or for @FastNative.
+      // as an argument for the `JniMethodStartSynchronized`.
       __ Load(method_register,
               FrameOffset(main_out_arg_size + mr_conv->MethodStackOffset().SizeValue()),
-              kRawPointerSize);
+              static_cast<size_t>(kPointerSize));
     }
 
     // Return to main path.
@@ -732,47 +677,17 @@
     }
   }
 
-  // 23. Emit suspend check slow path.
-  if (UNLIKELY(is_fast_native)) {
-    __ Bind(suspend_check_slow_path.get());
-    if (reference_return && main_out_arg_size != 0) {
-      jni_asm->cfi().AdjustCFAOffset(main_out_arg_size);
-      __ DecreaseFrameSize(main_out_arg_size);
-    }
-    __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pTestSuspend));
-    if (reference_return) {
-      // Suspend check entry point overwrites top of managed stack and leaves it clobbered.
-      // We need to restore the top for subsequent runtime call to `JniDecodeReferenceResult()`.
-      __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
-    }
-    if (reference_return && main_out_arg_size != 0) {
-      __ IncreaseFrameSize(main_out_arg_size);
-      jni_asm->cfi().AdjustCFAOffset(-main_out_arg_size);
-    }
-    __ Jump(suspend_check_resume.get());
-  }
-
-  // 24. Emit exception poll slow paths.
+  // 20. Emit exception poll slow paths.
   if (LIKELY(!is_critical_native)) {
     if (UNLIKELY(is_synchronized)) {
-      DCHECK(!is_fast_native);
       __ Bind(monitor_enter_exception_slow_path.get());
       if (main_out_arg_size != 0) {
         jni_asm->cfi().AdjustCFAOffset(main_out_arg_size);
         __ DecreaseFrameSize(main_out_arg_size);
       }
     }
-    __ Bind(exception_slow_path.get());
-    if (UNLIKELY(is_fast_native) && reference_return) {
-      // We performed the exception check early, so we need to adjust SP and pop IRT frame.
-      if (main_out_arg_size != 0) {
-        jni_asm->cfi().AdjustCFAOffset(main_out_arg_size);
-        __ DecreaseFrameSize(main_out_arg_size);
-      }
-      PopLocalReferenceFrame<kPointerSize>(
-          jni_asm.get(), jni_env_reg, saved_cookie_reg, callee_save_temp);
-    }
     DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
+    __ Bind(exception_slow_path.get());
     __ DeliverPendingException();
   }
 
@@ -796,9 +711,9 @@
                                     ManagedRegister jni_env_reg,
                                     ManagedRegister saved_cookie_reg,
                                     ManagedRegister temp_reg) {
-  const size_t kRawPointerSize = static_cast<size_t>(kPointerSize);
-  const MemberOffset jni_env_cookie_offset = JNIEnvExt::LocalRefCookieOffset(kRawPointerSize);
-  const MemberOffset jni_env_segment_state_offset = JNIEnvExt::SegmentStateOffset(kRawPointerSize);
+  const size_t pointer_size = static_cast<size_t>(kPointerSize);
+  const MemberOffset jni_env_cookie_offset = JNIEnvExt::LocalRefCookieOffset(pointer_size);
+  const MemberOffset jni_env_segment_state_offset = JNIEnvExt::SegmentStateOffset(pointer_size);
 
   // Load the old cookie that we shall need to restore.
   __ Load(saved_cookie_reg, jni_env_reg, jni_env_cookie_offset, kIRTCookieSize);
@@ -813,9 +728,9 @@
                                    ManagedRegister jni_env_reg,
                                    ManagedRegister saved_cookie_reg,
                                    ManagedRegister temp_reg) {
-  const size_t kRawPointerSize = static_cast<size_t>(kPointerSize);
-  const MemberOffset jni_env_cookie_offset = JNIEnvExt::LocalRefCookieOffset(kRawPointerSize);
-  const MemberOffset jni_env_segment_state_offset = JNIEnvExt::SegmentStateOffset(kRawPointerSize);
+  const size_t pointer_size = static_cast<size_t>(kPointerSize);
+  const MemberOffset jni_env_cookie_offset = JNIEnvExt::LocalRefCookieOffset(pointer_size);
+  const MemberOffset jni_env_segment_state_offset = JNIEnvExt::SegmentStateOffset(pointer_size);
 
   // Set the current segment state to the current cookie in JNI environment.
   __ Load(temp_reg, jni_env_reg, jni_env_cookie_offset, kIRTCookieSize);
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 9473202..e45a211 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -177,12 +177,10 @@
 
 X86JniCallingConvention::X86JniCallingConvention(bool is_static,
                                                  bool is_synchronized,
-                                                 bool is_fast_native,
                                                  bool is_critical_native,
                                                  const char* shorty)
     : JniCallingConvention(is_static,
                            is_synchronized,
-                           is_fast_native,
                            is_critical_native,
                            shorty,
                            kX86PointerSize) {
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index 7b62161..d589dbd 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -53,7 +53,6 @@
  public:
   X86JniCallingConvention(bool is_static,
                           bool is_synchronized,
-                          bool is_fast_native,
                           bool is_critical_native,
                           const char* shorty);
   ~X86JniCallingConvention() override {}
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index ddf3d74..ed40c5f 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -165,12 +165,10 @@
 
 X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static,
                                                        bool is_synchronized,
-                                                       bool is_fast_native,
                                                        bool is_critical_native,
                                                        const char* shorty)
     : JniCallingConvention(is_static,
                            is_synchronized,
-                           is_fast_native,
                            is_critical_native,
                            shorty,
                            kX86_64PointerSize) {
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index ee8603d..80453c3 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -48,7 +48,6 @@
  public:
   X86_64JniCallingConvention(bool is_static,
                              bool is_synchronized,
-                             bool is_fast_native,
                              bool is_critical_native,
                              const char* shorty);
   ~X86_64JniCallingConvention() override {}
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index e81e378..c59262d 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -968,11 +968,8 @@
   // TODO: place reference map on call
 }
 
-void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset) {
-  // Call *(TR + offset)
-  asm_.LoadFromOffset(kLoadWord, lr, tr, offset.Int32Value());
-  ___ Blx(lr);
-  // TODO: place reference map on call
+void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL);
 }
 
 void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
@@ -985,19 +982,6 @@
   asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value());
 }
 
-void ArmVIXLJNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
-  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  vixl32::Register scratch = temps.Acquire();
-  asm_.LoadFromOffset(kLoadUnsignedHalfword,
-                      scratch,
-                      tr,
-                      Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
-
-  ___ Cmp(scratch, 0);
-  ___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
-  // TODO: think about using CBNZ here.
-}
-
 void ArmVIXLJNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   vixl32::Register scratch = temps.Acquire();
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 07ace97..89805ce 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -182,9 +182,6 @@
   void Call(FrameOffset base, Offset offset) override;
   void CallFromThread(ThreadOffset32 offset) override;
 
-  // Generate suspend check and branch to `label` if there is a pending suspend request.
-  void SuspendCheck(JNIMacroLabel* label) override;
-
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to the `label` if it is.
   void ExceptionPoll(JNIMacroLabel* label) override;
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index f7144d0..bb16841 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -682,10 +682,8 @@
   ___ Blr(lr);
 }
 
-void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset) {
-  // Call *(TR + offset)
-  ___ Ldr(lr, MEM_OP(reg_x(TR), offset.Int32Value()));
-  ___ Blr(lr);
+void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
 }
 
 void Arm64JNIMacroAssembler::CreateJObject(ManagedRegister m_out_reg,
@@ -736,13 +734,6 @@
   ___ Str(scratch, MEM_OP(reg_x(SP), out_off.Int32Value()));
 }
 
-void Arm64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
-  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  Register scratch = temps.AcquireW();
-  ___ Ldrh(scratch, MEM_OP(reg_x(TR), Thread::ThreadFlagsOffset<kArm64PointerSize>().Int32Value()));
-  ___ Cbnz(scratch, Arm64JNIMacroLabel::Cast(label)->AsArm64());
-}
-
 void Arm64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   Register scratch = temps.AcquireX();
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 5d6a0e4..363bce9 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -166,9 +166,6 @@
   void Call(FrameOffset base, Offset offset) override;
   void CallFromThread(ThreadOffset64 offset) override;
 
-  // Generate suspend check and branch to `label` if there is a pending suspend request.
-  void SuspendCheck(JNIMacroLabel* label) override;
-
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to the `label` if it is.
   void ExceptionPoll(JNIMacroLabel* label) override;
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 79ab025..83b7eeb 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -99,7 +99,6 @@
 
   const bool is_static = true;
   const bool is_synchronized = false;
-  const bool is_fast_native = false;
   const bool is_critical_native = false;
   const char* shorty = "IIFII";
 
@@ -107,7 +106,6 @@
       JniCallingConvention::Create(&allocator,
                                    is_static,
                                    is_synchronized,
-                                   is_fast_native,
                                    is_critical_native,
                                    shorty,
                                    InstructionSet::kThumb2));
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 9b5b6e2..fbbcbde 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -153,7 +153,7 @@
   "     21c: d9 f8 24 80   ldr.w r8, [r9, #36]\n"
   "     220: 70 47         bx lr\n"
   "     222: d9 f8 8c 00   ldr.w r0, [r9, #140]\n"
-  "     226: d9 f8 c8 e2   ldr.w lr, [r9, #712]\n"
+  "     226: d9 f8 d0 e2   ldr.w lr, [r9, #720]\n"
   "     22a: f0 47         blx lr\n"
 };
 
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 0ccf4cd..5da70c1 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -242,9 +242,6 @@
   virtual void Call(FrameOffset base, Offset offset) = 0;
   virtual void CallFromThread(ThreadOffset<kPointerSize> offset) = 0;
 
-  // Generate suspend check and branch to `label` if there is a pending suspend request.
-  virtual void SuspendCheck(JNIMacroLabel* label) = 0;
-
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to the `label` if it is.
   virtual void ExceptionPoll(JNIMacroLabel* label) = 0;
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index f805556..b08503e 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -563,11 +563,6 @@
   __ movl(Address(ESP, offset), scratch);
 }
 
-void X86JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
-  __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>()), Immediate(0));
-  __ j(kNotEqual, X86JNIMacroLabel::Cast(label)->AsX86());
-}
-
 void X86JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
   __ fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0));
   __ j(kNotEqual, X86JNIMacroLabel::Cast(label)->AsX86());
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 486cd7e..1de4eb1 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -158,9 +158,6 @@
   void Call(FrameOffset base, Offset offset) override;
   void CallFromThread(ThreadOffset32 offset) override;
 
-  // Generate suspend check and branch to `label` if there is a pending suspend request.
-  void SuspendCheck(JNIMacroLabel* label) override;
-
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to the `label` if it is.
   void ExceptionPoll(JNIMacroLabel* label) override;
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index fcc517e..b145e97 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -642,12 +642,6 @@
   __ movq(Address(CpuRegister(RSP), offset), scratch);
 }
 
-void X86_64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
-  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>(), true),
-                Immediate(0));
-  __ j(kNotEqual, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
-}
-
 void X86_64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
   __ gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true),
                 Immediate(0));
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index baebf48..0468901 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -178,9 +178,6 @@
   void Call(FrameOffset base, Offset offset) override;
   void CallFromThread(ThreadOffset64 offset) override;
 
-  // Generate suspend check and branch to `label` if there is a pending suspend request.
-  void SuspendCheck(JNIMacroLabel* label) override;
-
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to the `label` if it is.
   void ExceptionPoll(JNIMacroLabel* label) override;
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index 8663d8b..7bcff2b 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -505,7 +505,7 @@
   EXPECT_EQ(64U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(4U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(167 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+  EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
 
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 9b1bd26..f3fc97e 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -74,13 +74,15 @@
 
   // JNI
   qpoints->pJniMethodStart = JniMethodStart;
+  qpoints->pJniMethodFastStart = JniMethodFastStart;
   qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
   qpoints->pJniMethodEnd = JniMethodEnd;
   qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodFastEndWithReference = JniMethodFastEndWithReference;
   qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
+  qpoints->pJniMethodFastEnd = JniMethodFastEnd;
   qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
-  qpoints->pJniDecodeReferenceResult = JniDecodeReferenceResult;
 
   // Locks
   if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 6076ec6..3f7c230 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -55,20 +55,23 @@
 // JNI entrypoints.
 // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
 extern void JniMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern void JniMethodFastStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern void JniMethodStartSynchronized(jobject to_lock, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern void JniMethodEnd(Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern void JniMethodFastEnd(Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern void JniMethodEndSynchronized(jobject locked, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern mirror::Object* JniMethodFastEndWithReference(jobject result, Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
                                                              jobject locked,
                                                              Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
-extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self)
-    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 
 // JNI entrypoints when monitoring entry/exit.
 extern void JniMonitoredMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 842f1b6..5deb557 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -73,12 +73,14 @@
   V(AputObject, void, mirror::Array*, int32_t, mirror::Object*) \
 \
   V(JniMethodStart, void, Thread*) \
+  V(JniMethodFastStart, void, Thread*) \
   V(JniMethodStartSynchronized, void, jobject, Thread*) \
   V(JniMethodEnd, void, Thread*) \
+  V(JniMethodFastEnd, void, Thread*) \
   V(JniMethodEndSynchronized, void, jobject, Thread*) \
   V(JniMethodEndWithReference, mirror::Object*, jobject, Thread*) \
+  V(JniMethodFastEndWithReference, mirror::Object*, jobject, Thread*) \
   V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, jobject, Thread*) \
-  V(JniDecodeReferenceResult, mirror::Object*, jobject, Thread*) \
   V(QuickGenericJniTrampoline, void, ArtMethod*) \
 \
   V(LockObject, void, mirror::Object*) \
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 995a1f1..d09e21d 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -41,6 +41,8 @@
 static_assert(sizeof(IRTSegmentState) == sizeof(uint32_t), "IRTSegmentState size unexpected");
 static_assert(std::is_trivial<IRTSegmentState>::value, "IRTSegmentState not trivial");
 
+static inline void GoToRunnableFast(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
+
 extern void ReadBarrierJni(mirror::CompressedReference<mirror::Class>* declaring_class,
                            Thread* self ATTRIBUTE_UNUSED) {
   DCHECK(kUseReadBarrier);
@@ -57,6 +59,14 @@
   declaring_class->Assign(to_ref);
 }
 
+// Called on entry to fast JNI, push a new local reference table only.
+extern void JniMethodFastStart(Thread* self) {
+  if (kIsDebugBuild) {
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+    CHECK(native_method->IsFastNative()) << native_method->PrettyMethod();
+  }
+}
+
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
 extern void JniMethodStart(Thread* self) {
   if (kIsDebugBuild) {
@@ -83,6 +93,23 @@
   self->TransitionFromSuspendedToRunnable();
 }
 
+ALWAYS_INLINE static inline void GoToRunnableFast(Thread* self) {
+  if (kIsDebugBuild) {
+    // Should only enter here if the method is @FastNative.
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+    CHECK(native_method->IsFastNative()) << native_method->PrettyMethod();
+  }
+
+  // When we are in @FastNative, we are already Runnable.
+  // Only do a suspend check on the way out of JNI.
+  if (UNLIKELY(self->TestAllFlags())) {
+    // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
+    // is a flag raised.
+    DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
+    self->CheckSuspend();
+  }
+}
+
 static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JNIEnvExt* env = self->GetJniEnv();
@@ -123,26 +150,15 @@
   GoToRunnable(self);
 }
 
+extern void JniMethodFastEnd(Thread* self) {
+  GoToRunnableFast(self);
+}
+
 extern void JniMethodEndSynchronized(jobject locked, Thread* self) {
   GoToRunnable(self);
   UnlockJniSynchronizedMethod(locked, self);  // Must decode before pop.
 }
 
-extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  DCHECK(!self->IsExceptionPending());
-  ObjPtr<mirror::Object> o = self->DecodeJObject(result);
-  // Process result.
-  if (UNLIKELY(self->GetJniEnv()->IsCheckJniEnabled())) {
-    // CheckReferenceResult can resolve types.
-    StackHandleScope<1> hs(self);
-    HandleWrapperObjPtr<mirror::Object> h_obj(hs.NewHandleWrapper(&o));
-    CheckReferenceResult(h_obj, self);
-  }
-  VerifyObject(o);
-  return o.Ptr();
-}
-
 // Common result handling for EndWithReference.
 static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS {
@@ -162,6 +178,11 @@
   return o.Ptr();
 }
 
+extern mirror::Object* JniMethodFastEndWithReference(jobject result, Thread* self) {
+  GoToRunnableFast(self);
+  return JniMethodEndWithReferenceHandleResult(result, self);
+}
+
 extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) {
   GoToRunnable(self);
   return JniMethodEndWithReferenceHandleResult(result, self);
@@ -192,14 +213,7 @@
     MONITOR_JNI(PaletteNotifyEndJniInvocation);
     GoToRunnable(self);
   } else if (fast_native) {
-    // When we are in @FastNative, we are already Runnable.
-    DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
-    // Only do a suspend check on the way out of JNI just like compiled stubs.
-    if (UNLIKELY(self->TestAllFlags())) {
-      // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
-      // is a flag raised.
-      self->CheckSuspend();
-    }
+    GoToRunnableFast(self);
   }
   // We need the mutator lock (i.e., calling GoToRunnable()) before accessing the shorty or the
   // locked object.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 28025be..3279f7d 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2116,25 +2116,25 @@
     }
   }
 
-  // Skip calling JniMethodStart for @CriticalNative and @FastNative.
-  if (LIKELY(normal_native)) {
+  // Skip calling JniMethodStart for @CriticalNative.
+  if (LIKELY(!critical_native)) {
     // Start JNI.
     if (called->IsSynchronized()) {
+      DCHECK(normal_native) << " @FastNative and synchronize is not supported";
       jobject lock = GetGenericJniSynchronizationObject(self, called);
       JniMethodStartSynchronized(lock, self);
       if (self->IsExceptionPending()) {
         return nullptr;  // Report error.
       }
     } else {
-      JniMethodStart(self);
+      if (fast_native) {
+        JniMethodFastStart(self);
+      } else {
+        DCHECK(normal_native);
+        JniMethodStart(self);
+      }
     }
-  } else {
-    DCHECK(!called->IsSynchronized())
-        << "@FastNative/@CriticalNative and synchronize is not supported";
-  }
 
-  // Skip pushing IRT frame for @CriticalNative.
-  if (LIKELY(!critical_native)) {
     // Push local reference frame.
     JNIEnvExt* env = self->GetJniEnv();
     DCHECK(env != nullptr);
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 0853cae..73f97bc 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -216,18 +216,21 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjInstance, pGetObjStatic, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjStatic, pAputObject, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObject, pJniMethodStart, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodStartSynchronized,
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodFastStart,
+                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastStart, pJniMethodStartSynchronized,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStartSynchronized, pJniMethodEnd,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndSynchronized, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodFastEnd, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEnd, pJniMethodEndSynchronized, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference,
+                         pJniMethodFastEndWithReference, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEndWithReference,
                          pJniMethodEndWithReferenceSynchronized, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReferenceSynchronized,
-                         pJniDecodeReferenceResult, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniDecodeReferenceResult,
                          pQuickGenericJniTrampoline, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickGenericJniTrampoline, pLockObject, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLockObject, pUnlockObject, sizeof(void*));
diff --git a/runtime/oat.h b/runtime/oat.h
index 37d738b..95eb0e1 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
-  // Last oat version changed reason: JNI: Remove `JniMethodFast{Start,End}()`.
-  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '0', '4', '\0' } };
+  // Last oat version changed reason: Inline IRT frame push/pop into JNI stubs.
+  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '0', '3', '\0' } };
 
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 9ba3efc..e5b19e5 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3477,7 +3477,6 @@
   QUICK_ENTRY_POINT_INFO(pJniMethodEndSynchronized)
   QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReference)
   QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReferenceSynchronized)
-  QUICK_ENTRY_POINT_INFO(pJniDecodeReferenceResult)
   QUICK_ENTRY_POINT_INFO(pQuickGenericJniTrampoline)
   QUICK_ENTRY_POINT_INFO(pLockObject)
   QUICK_ENTRY_POINT_INFO(pUnlockObject)
@@ -3587,6 +3586,9 @@
   QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg29)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
   QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
+
+  QUICK_ENTRY_POINT_INFO(pJniMethodFastStart)
+  QUICK_ENTRY_POINT_INFO(pJniMethodFastEnd)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;