Clean up JNI entrypoint assembly.

Move JNI entrypoints to `jni_entrypoints_<arch>.S` and
shared helper macros to `asm_support_<arch>.S`. Introduce
some new macros to reduce code duplication. Fix x86-64
using ESP in the JNI lock slow path.

Rename JNI lock/unlock and read barrier entrypoints to pull
the "jni" to the front and drop "quick" from their names.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I20d059b07b308283db6c4e36a508480d91ad07fc
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 863f47b..40110d7 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -683,7 +683,7 @@
     //
     // For baker read barrier, do a fast check whether the class is already marked.
     //
-    // Call into the runtime's `art_read_barrier_jni` and have it fix up
+    // Call into the runtime's `art_jni_read_barrier` and have it fix up
     // the class address if it was moved.
     //
     // The entrypoint preserves the method register and argument registers.
@@ -702,7 +702,7 @@
     }
 
     ThreadOffset<kPointerSize> read_barrier = QUICK_ENTRYPOINT_OFFSET(kPointerSize,
-                                                                      pReadBarrierJni);
+                                                                      pJniReadBarrier);
     __ CallFromThread(read_barrier);
 
     // Return to main path.
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index ff95bdd..000a2d1 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -409,4 +409,97 @@
     .cfi_adjust_cfa_offset -28
 .endm
 
+// Locking is needed for both managed code and JNI stubs.
+.macro LOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_lock, can_be_null
+    ldr    \tmp1, [rSELF, #THREAD_ID_OFFSET]
+    .if \can_be_null
+        cbz \obj, \slow_lock
+    .endif
+1:
+    ldrex  \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    eor    \tmp3, \tmp2, \tmp1        @ Prepare the value to store if unlocked
+                                      @   (thread id, count of 0 and preserved read barrier bits),
+                                      @ or prepare to compare thread id for recursive lock check
+                                      @   (lock_word.ThreadId() ^ self->ThreadId()).
+    ands   ip, \tmp2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ Test the non-gc bits.
+    bne    2f                         @ Check if unlocked.
+    @ unlocked case - store tmp3: original lock word plus thread id, preserved read barrier bits.
+    strex  \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    cbnz   \tmp2, 3f                   @ If store failed, retry.
+    dmb    ish                        @ Full (LoadLoad|LoadStore) memory barrier.
+    bx lr
+2:  @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1
+#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
+#error "Expecting thin lock count and gc state in consecutive bits."
+#endif
+                                      @ Check lock word state and thread id together.
+    bfc    \tmp3, \
+           #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \
+           #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
+    cbnz   \tmp3, \slow_lock          @ if either of the top two bits are set, or the lock word's
+                                      @ thread id did not match, go slow path.
+    add    \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ Increment the recursive lock count.
+                                      @ Extract the new thin lock count for overflow check.
+    ubfx   \tmp2, \tmp3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE
+    cbz    \tmp2, \slow_lock          @ Zero as the new count indicates overflow, go slow path.
+                                      @ strex necessary for read barrier bits.
+    strex  \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    cbnz   \tmp2, 3f                  @ If strex failed, retry.
+    bx lr
+3:
+    b      1b                         @ retry
+.endm
+
+// Unlocking is needed for both managed code and JNI stubs.
+.macro UNLOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_unlock, can_be_null
+    ldr    \tmp1, [rSELF, #THREAD_ID_OFFSET]
+    .if \can_be_null
+        cbz    \obj, \slow_unlock
+    .endif
+1:
+#ifndef USE_READ_BARRIER
+    ldr    \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+                                      @ Need to use atomic instructions for read barrier.
+    ldrex  \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#endif
+    eor    \tmp3, \tmp2, \tmp1        @ Prepare the value to store if simply locked
+                                      @   (mostly 0s, and preserved read barrier bits),
+                                      @ or prepare to compare thread id for recursive lock check
+                                      @   (lock_word.ThreadId() ^ self->ThreadId()).
+    ands   ip, \tmp3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ Test the non-gc bits.
+    bne    2f                         @ Locked recursively or by other thread?
+    @ Transition to unlocked.
+    dmb    ish                        @ Full (LoadStore|StoreStore) memory barrier.
+#ifndef USE_READ_BARRIER
+    str    \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+                                      @ strex necessary for read barrier bits
+    strex  \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    cbnz   \tmp2, 3f                  @ If the store failed, retry.
+#endif
+    bx     lr
+2:  @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1
+#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
+#error "Expecting thin lock count and gc state in consecutive bits."
+#endif
+                                      @ Check lock word state and thread id together,
+    bfc    \tmp3, \
+           #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \
+           #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
+    cbnz   \tmp3, \slow_unlock        @ if either of the top two bits are set, or the lock word's
+                                      @ thread id did not match, go slow path.
+    sub    \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ Decrement recursive lock count.
+#ifndef USE_READ_BARRIER
+    str    \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+                                      @ strex necessary for read barrier bits.
+    strex  \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    cbnz   \tmp2, 3f                  @ If the store failed, retry.
+#endif
+    bx     lr
+3:
+    b      1b                         @ retry
+.endm
+
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 8f46151..5eaabba 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -173,7 +173,6 @@
   qpoints->pMemcpy = memcpy;
 
   // Read barrier.
-  qpoints->pReadBarrierJni = art_read_barrier_jni;
   UpdateReadBarrierEntrypoints(qpoints, /*is_active=*/ false);
   qpoints->pReadBarrierMarkReg12 = nullptr;  // Cannot use register 12 (IP) to pass arguments.
   qpoints->pReadBarrierMarkReg13 = nullptr;  // Cannot use register 13 (SP) to pass arguments.
diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S
index c0a6288..26c1d31 100644
--- a/runtime/arch/arm/jni_entrypoints_arm.S
+++ b/runtime/arch/arm/jni_entrypoints_arm.S
@@ -16,6 +16,30 @@
 
 #include "asm_support_arm.S"
 
+#define MANAGED_ARGS_R4_LR_SAVE_SIZE /*s0-s15*/ 16 * 4 + /*r0-r3*/ 4 * 4 + /*r4*/ 4 + /*lr*/ 4
+
+// Note: R4 is saved for stack alignment.
+.macro SAVE_MANAGED_ARGS_R4_LR_INCREASE_FRAME
+    // Save GPR args r0-r3 and return address. Also save r4 for stack alignment.
+    push   {r0-r4, lr}
+    .cfi_adjust_cfa_offset 24
+    .cfi_rel_offset lr, 20
+    // Save FPR args.
+    vpush  {s0-s15}
+    .cfi_adjust_cfa_offset 64
+.endm
+
+.macro RESTORE_MANAGED_ARGS_R4_AND_RETURN restore_cfa
+    // Restore FPR args.
+    vpop   {s0-s15}
+    .cfi_adjust_cfa_offset -64
+    // Restore GPR args and r4 and return.
+    pop    {r0-r4, pc}
+    .if \restore_cfa
+        .cfi_adjust_cfa_offset 64
+    .endif
+.endm
+
     /*
      * Jni dlsym lookup stub.
      */
@@ -250,25 +274,80 @@
      * Read barrier for the method's declaring class needed by JNI stub for static methods.
      * (We're using a pointer to the declaring class in `ArtMethod` as `jclass`.)
      */
-    .extern artReadBarrierJni
-ENTRY art_read_barrier_jni
+    .extern artJniReadBarrier
+ENTRY art_jni_read_barrier
     // Note: Managed callee-save registers have been saved by the JNI stub.
-    // Save return address, managed GPR args and the method.
-    push  {r0-r3, lr}
-    .cfi_adjust_cfa_offset 20
-    .cfi_rel_offset lr, 16
-    // Increase frame: padding.
-    INCREASE_FRAME 12
-    // Save FPR args.
-    vpush {s0-s15}
-    .cfi_adjust_cfa_offset 64
+    // Save managed args, r4 (for stack alignment) and LR.
+    SAVE_MANAGED_ARGS_R4_LR_INCREASE_FRAME
     // The method argument is already in r0.
-    bl    artReadBarrierJni  // (ArtMethod*)
-    // Restore FPR args.
-    vpop  {s0-s15}
-    .cfi_adjust_cfa_offset -64
-    // Remove padding.
-    DECREASE_FRAME 12
-    // Restore the method and managed args and return.
-    pop   {r0-r3, pc}
-END art_read_barrier_jni
+    bl    artJniReadBarrier  // (ArtMethod*)
+    // Restore args and return.
+    RESTORE_MANAGED_ARGS_R4_AND_RETURN /*restore_cfa*/ 0
+END art_jni_read_barrier
+
+    /*
+     * Entry from JNI stub that tries to lock the object in a fast path and
+     * calls `artLockObjectFromCode()` (the same as for managed code) for the
+     * difficult cases, may block for GC.
+     * Custom calling convention:
+     *     r4 holds the non-null object to lock.
+     *     Callee-save registers have been saved and can be used as temporaries.
+     *     All argument registers need to be preserved.
+     */
+ENTRY art_jni_lock_object
+    LOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Llock_object_jni_slow, /*can_be_null*/ 0
+
+.Llock_object_jni_slow:
+    // Save managed args, r4 (for stack alignment) and LR.
+    SAVE_MANAGED_ARGS_R4_LR_INCREASE_FRAME
+    // Call `artLockObjectFromCode()`
+    mov    r0, r4                       @ Pass the object to lock.
+    mov    r1, rSELF                    @ Pass Thread::Current().
+    bl     artLockObjectFromCode        @ (Object* obj, Thread*)
+    // Check result.
+    cbnz   r0, 1f
+    // Restore args and r4 and return.
+    RESTORE_MANAGED_ARGS_R4_AND_RETURN /*restore_cfa*/ 1
+1:
+    // All args are irrelevant when throwing an exception and R4 is preserved
+    // by the `artLockObjectFromCode()` call. Load LR and drop saved args and R4.
+    ldr    lr, [sp, #(MANAGED_ARGS_R4_LR_SAVE_SIZE - 4)]
+    .cfi_restore lr
+    DECREASE_FRAME MANAGED_ARGS_R4_LR_SAVE_SIZE
+    // Make a tail call to `artDeliverPendingExceptionFromCode()`.
+    // Rely on the JNI transition frame constructed in the JNI stub.
+    mov    r0, rSELF                           @ Pass Thread::Current().
+    b      artDeliverPendingExceptionFromCode  @ (Thread*)
+END art_jni_lock_object
+
+    /*
+     * Entry from JNI stub that tries to unlock the object in a fast path and calls
+     * `artJniUnlockObject()` for the difficult cases. Note that failure to unlock
+     * is fatal, so we do not need to check for exceptions in the slow path.
+     * Custom calling convention:
+     *     r4 holds the non-null object to unlock.
+     *     Callee-save registers have been saved and can be used as temporaries.
+     *     Return registers r0-r1 and s0-s1 need to be preserved.
+     */
+    .extern artJniLockObject
+ENTRY art_jni_unlock_object
+    UNLOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Lunlock_object_jni_slow, /*can_be_null*/ 0
+
+ .Lunlock_object_jni_slow:
+    // Save GPR return registers and return address. Also save r4 for stack alignment.
+    push   {r0-r1, r4, lr}
+    .cfi_adjust_cfa_offset 16
+    .cfi_rel_offset lr, 12
+    // Save FPR return registers.
+    vpush  {s0-s1}
+    .cfi_adjust_cfa_offset 8
+    // Call `artJniUnlockObject()`.
+    mov    r0, r4                       @ Pass the object to unlock.
+    mov    r1, rSELF                    @ Pass Thread::Current().
+    bl     artJniUnlockObject           @ (Object* obj, Thread*)
+    // Restore FPR return registers.
+    vpop   {s0-s1}
+    .cfi_adjust_cfa_offset -8
+    // Restore GPR return registers and r4 and return.
+    pop    {r0-r1, r4, pc}
+END art_jni_unlock_object
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index ca63914..bc6902d 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -493,46 +493,6 @@
      */
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
 
-.macro LOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_lock, can_be_null
-    ldr    \tmp1, [rSELF, #THREAD_ID_OFFSET]
-    .if \can_be_null
-        cbz \obj, \slow_lock
-    .endif
-1:
-    ldrex  \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    eor    \tmp3, \tmp2, \tmp1        @ Prepare the value to store if unlocked
-                                      @   (thread id, count of 0 and preserved read barrier bits),
-                                      @ or prepare to compare thread id for recursive lock check
-                                      @   (lock_word.ThreadId() ^ self->ThreadId()).
-    ands   ip, \tmp2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ Test the non-gc bits.
-    bne    2f                         @ Check if unlocked.
-    @ unlocked case - store tmp3: original lock word plus thread id, preserved read barrier bits.
-    strex  \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    cbnz   \tmp2, 3f                   @ If store failed, retry.
-    dmb    ish                        @ Full (LoadLoad|LoadStore) memory barrier.
-    bx lr
-2:  @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1
-#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
-#error "Expecting thin lock count and gc state in consecutive bits."
-#endif
-                                      @ Check lock word state and thread id together.
-    bfc    \tmp3, \
-           #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \
-           #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
-    cbnz   \tmp3, \slow_lock          @ if either of the top two bits are set, or the lock word's
-                                      @ thread id did not match, go slow path.
-    add    \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ Increment the recursive lock count.
-                                      @ Extract the new thin lock count for overflow check.
-    ubfx   \tmp2, \tmp3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE
-    cbz    \tmp2, \slow_lock          @ Zero as the new count indicates overflow, go slow path.
-                                      @ strex necessary for read barrier bits.
-    strex  \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    cbnz   \tmp2, 3f                  @ If strex failed, retry.
-    bx lr
-3:
-    b      1b                         @ retry
-.endm
-
     /*
      * Entry from managed code that tries to lock the object in a fast path and
      * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
@@ -562,57 +522,6 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_lock_object_no_inline
 
-.macro UNLOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_unlock, can_be_null
-    ldr    \tmp1, [rSELF, #THREAD_ID_OFFSET]
-    .if \can_be_null
-        cbz    \obj, \slow_unlock
-    .endif
-1:
-#ifndef USE_READ_BARRIER
-    ldr    \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-#else
-                                      @ Need to use atomic instructions for read barrier.
-    ldrex  \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-#endif
-    eor    \tmp3, \tmp2, \tmp1        @ Prepare the value to store if simply locked
-                                      @   (mostly 0s, and preserved read barrier bits),
-                                      @ or prepare to compare thread id for recursive lock check
-                                      @   (lock_word.ThreadId() ^ self->ThreadId()).
-    ands   ip, \tmp3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ Test the non-gc bits.
-    bne    2f                         @ Locked recursively or by other thread?
-    @ Transition to unlocked.
-    dmb    ish                        @ Full (LoadStore|StoreStore) memory barrier.
-#ifndef USE_READ_BARRIER
-    str    \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-#else
-                                      @ strex necessary for read barrier bits
-    strex  \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    cbnz   \tmp2, 3f                  @ If the store failed, retry.
-#endif
-    bx     lr
-2:  @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1
-#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
-#error "Expecting thin lock count and gc state in consecutive bits."
-#endif
-                                      @ Check lock word state and thread id together,
-    bfc    \tmp3, \
-           #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \
-           #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
-    cbnz   \tmp3, \slow_unlock        @ if either of the top two bits are set, or the lock word's
-                                      @ thread id did not match, go slow path.
-    sub    \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ Decrement recursive lock count.
-#ifndef USE_READ_BARRIER
-    str    \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-#else
-                                      @ strex necessary for read barrier bits.
-    strex  \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    cbnz   \tmp2, 3f                  @ If the store failed, retry.
-#endif
-    bx     lr
-3:
-    b      1b                         @ retry
-.endm
-
     /*
      * Entry from managed code that tries to unlock the object in a fast path and calls
      * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
@@ -645,80 +554,6 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from JNI stub that tries to lock the object in a fast path and
-     * calls `artLockObjectFromCode()` (the same as for managed code) for the
-     * difficult cases, may block for GC.
-     * Custom calling convention:
-     *     r4 holds the non-null object to lock.
-     *     Callee-save registers have been saved and can be used as temporaries.
-     *     All argument registers need to be preserved.
-     */
-ENTRY art_quick_lock_object_jni
-    LOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Llock_object_jni_slow, /*can_be_null*/ 0
-
-.Llock_object_jni_slow:
-    // Save GPR args r0-r3 and return address. Also save r4 for stack alignment.
-    push   {r0-r4, lr}
-    .cfi_adjust_cfa_offset 24
-    .cfi_rel_offset lr, 20
-    // Save FPR args.
-    vpush  {s0-s15}
-    .cfi_adjust_cfa_offset 64
-    // Call `artLockObjectFromCode()`
-    mov    r0, r4                       @ Pass the object to lock.
-    mov    r1, rSELF                    @ Pass Thread::Current().
-    bl     artLockObjectFromCode        @ (Object* obj, Thread*)
-    // Restore FPR args.
-    vpop   {s0-s15}
-    .cfi_adjust_cfa_offset -64
-    // Check result.
-    cbnz   r0, 1f
-    // Restore GPR args and r4 and return.
-    pop    {r0-r4, pc}
-1:
-    // GPR args are irrelevant when throwing an exception but pop them anyway with the LR we need.
-    pop    {r0-r4, lr}
-    .cfi_adjust_cfa_offset -24
-    .cfi_restore lr
-    // Make a tail call to `artDeliverPendingExceptionFromCode()`.
-    // Rely on the JNI transition frame constructed in the JNI stub.
-    mov    r0, rSELF                           @ Pass Thread::Current().
-    b      artDeliverPendingExceptionFromCode  @ (Thread*)
-END art_quick_lock_object_jni
-
-    /*
-     * Entry from JNI stub that tries to unlock the object in a fast path and calls
-     * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock
-     * is fatal, so we do not need to check for exceptions in the slow path.
-     * Custom calling convention:
-     *     r4 holds the non-null object to unlock.
-     *     Callee-save registers have been saved and can be used as temporaries.
-     *     Return registers r0-r1 and s0-s1 need to be preserved.
-     */
-    .extern artLockObjectFromJni
-ENTRY art_quick_unlock_object_jni
-    UNLOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Lunlock_object_jni_slow, /*can_be_null*/ 0
-
- .Lunlock_object_jni_slow:
-    // Save GPR return registers and return address. Also save r4 for stack alignment.
-    push   {r0-r1, r4, lr}
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset lr, 12
-    // Save FPR return registers.
-    vpush  {s0-s1}
-    .cfi_adjust_cfa_offset 8
-    // Call `artUnlockObjectFromJni()`
-    mov    r0, r4                       @ Pass the object to unlock.
-    mov    r1, rSELF                    @ Pass Thread::Current().
-    bl     artUnlockObjectFromJni       @ (Object* obj, Thread*)
-    // Restore FPR return registers.
-    vpop   {s0-s1}
-    .cfi_adjust_cfa_offset -8
-    // Restore GPR return registers and r4 and return.
-    pop    {r0-r1, r4, pc}
-END art_quick_unlock_object_jni
-
-    /*
      * Entry from managed code that calls artInstanceOfFromCode and on failure calls
      * artThrowClassCastExceptionForObject.
      */
diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S
index e02d7f2..bad3397 100644
--- a/runtime/arch/arm64/asm_support_arm64.S
+++ b/runtime/arch/arm64/asm_support_arm64.S
@@ -372,4 +372,80 @@
     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0
 .endm
 
+// Locking is needed for both managed code and JNI stubs.
+.macro LOCK_OBJECT_FAST_PATH obj, slow_lock, can_be_null
+    // Use scratch registers x8-x11 as temporaries.
+    ldr    w9, [xSELF, #THREAD_ID_OFFSET]
+    .if \can_be_null
+        cbz    \obj, \slow_lock
+    .endif
+                                      // Exclusive load/store has no immediate anymore.
+    add    x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET
+1:
+    ldaxr  w10, [x8]                  // Acquire needed only in most common case.
+    eor    w11, w10, w9               // Prepare the value to store if unlocked
+                                      //   (thread id, count of 0 and preserved read barrier bits),
+                                      // or prepare to compare thread id for recursive lock check
+                                      //   (lock_word.ThreadId() ^ self->ThreadId()).
+    tst    w10, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // Test the non-gc bits.
+    b.ne   2f                         // Check if unlocked.
+    // Unlocked case - store w11: original lock word plus thread id, preserved read barrier bits.
+    stxr   w10, w11, [x8]
+    cbnz   w10, 1b                    // If the store failed, retry.
+    ret
+2:  // w10: original lock word, w9: thread id, w11: w10 ^ w11
+                                      // Check lock word state and thread id together,
+    tst    w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
+    b.ne   \slow_lock
+    add    w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // Increment the recursive lock count.
+    tst    w11, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED  // Test the new thin lock count.
+    b.eq   \slow_lock                 // Zero as the new count indicates overflow, go slow path.
+    stxr   w10, w11, [x8]
+    cbnz   w10, 1b                    // If the store failed, retry.
+    ret
+.endm
+
+// Unlocking is needed for both managed code and JNI stubs.
+.macro UNLOCK_OBJECT_FAST_PATH obj, slow_unlock, can_be_null
+    // Use scratch registers x8-x11 as temporaries.
+    ldr    w9, [xSELF, #THREAD_ID_OFFSET]
+    .if \can_be_null
+        cbz    \obj, \slow_unlock
+    .endif
+                                      // Exclusive load/store has no immediate anymore.
+    add    x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET
+1:
+#ifndef USE_READ_BARRIER
+    ldr    w10, [x8]
+#else
+    ldxr   w10, [x8]                  // Need to use atomic instructions for read barrier.
+#endif
+    eor    w11, w10, w9               // Prepare the value to store if simply locked
+                                      //   (mostly 0s, and preserved read barrier bits),
+                                      // or prepare to compare thread id for recursive lock check
+                                      //   (lock_word.ThreadId() ^ self->ThreadId()).
+    tst    w11, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // Test the non-gc bits.
+    b.ne   2f                         // Locked recursively or by other thread?
+    // Transition to unlocked.
+#ifndef USE_READ_BARRIER
+    stlr   w11, [x8]
+#else
+    stlxr  w10, w11, [x8]             // Need to use atomic instructions for read barrier.
+    cbnz   w10, 1b                    // If the store failed, retry.
+#endif
+    ret
+2:
+                                      // Check lock word state and thread id together.
+    tst    w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
+    b.ne   \slow_unlock
+    sub    w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
+#ifndef USE_READ_BARRIER
+    str    w11, [x8]
+#else
+    stxr   w10, w11, [x8]             // Need to use atomic instructions for read barrier.
+    cbnz   w10, 1b                    // If the store failed, retry.
+#endif
+    ret
+.endm
+
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_S_
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 986445c..5a2284e 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -187,7 +187,6 @@
   qpoints->pMemcpy = memcpy;
 
   // Read barrier.
-  qpoints->pReadBarrierJni = art_read_barrier_jni;
   qpoints->pReadBarrierMarkReg16 = nullptr;  // IP0 is used as a temp by the asm stub.
   UpdateReadBarrierEntrypoints(qpoints, /*is_active=*/ false);
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
index 7546f06..701ce2e 100644
--- a/runtime/arch/arm64/jni_entrypoints_arm64.S
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -16,35 +16,45 @@
 
 #include "asm_support_arm64.S"
 
+#define ALL_ARGS_SIZE (/*x0-x7*/ 8 * 8 + /*d0-d7*/ 8 * 8)
+
+.macro SAVE_ALL_ARGS_INCREASE_FRAME extra_space
+    // Save register args x0-x7, d0-d7 and return address.
+    stp    x0, x1, [sp, #-(ALL_ARGS_SIZE + \extra_space)]!
+    .cfi_adjust_cfa_offset (ALL_ARGS_SIZE + \extra_space)
+    stp    x2, x3, [sp, #16]
+    stp    x4, x5, [sp, #32]
+    stp    x6, x7, [sp, #48]
+    stp    d0, d1, [sp, #64]
+    stp    d2, d3, [sp, #80]
+    stp    d4, d5, [sp, #96]
+    stp    d6, d7, [sp, #112]
+.endm
+
+.macro RESTORE_ALL_ARGS_DECREASE_FRAME extra_space
+    ldp    x2, x3, [sp, #16]
+    ldp    x4, x5, [sp, #32]
+    ldp    x6, x7, [sp, #48]
+    ldp    d0, d1, [sp, #64]
+    ldp    d2, d3, [sp, #80]
+    ldp    d4, d5, [sp, #96]
+    ldp    d6, d7, [sp, #112]
+    ldp    x0, x1, [sp], #(ALL_ARGS_SIZE + \extra_space)
+    .cfi_adjust_cfa_offset -(ALL_ARGS_SIZE + \extra_space)
+.endm
+
     /*
      * Jni dlsym lookup stub.
      */
     .extern artFindNativeMethod
     .extern artFindNativeMethodRunnable
-
 ENTRY art_jni_dlsym_lookup_stub
     // spill regs.
-    stp   x29, x30, [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset x29, 0
-    .cfi_rel_offset x30, 8
-    mov   x29, sp
-    stp   d6, d7,   [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
-    stp   d4, d5,   [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
-    stp   d2, d3,   [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
-    stp   d0, d1,   [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
-    stp   x6, x7,   [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
-    stp   x4, x5,   [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
-    stp   x2, x3,   [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
-    stp   x0, x1,   [sp, #-16]!
-    .cfi_adjust_cfa_offset 16
+    SAVE_ALL_ARGS_INCREASE_FRAME 2 * 8
+    stp   x29, x30, [sp, ALL_ARGS_SIZE]
+    .cfi_rel_offset x29, ALL_ARGS_SIZE
+    .cfi_rel_offset x30, ALL_ARGS_SIZE + 8
+    add   x29, sp, ALL_ARGS_SIZE
 
     mov x0, xSELF   // pass Thread::Current()
     // Call artFindNativeMethod() for normal native and artFindNativeMethodRunnable()
@@ -64,26 +74,10 @@
     mov   x17, x0    // store result in scratch reg.
 
     // load spill regs.
-    ldp   x0, x1,   [sp], #16
-    .cfi_adjust_cfa_offset -16
-    ldp   x2, x3,   [sp], #16
-    .cfi_adjust_cfa_offset -16
-    ldp   x4, x5,   [sp], #16
-    .cfi_adjust_cfa_offset -16
-    ldp   x6, x7,   [sp], #16
-    .cfi_adjust_cfa_offset -16
-    ldp   d0, d1,   [sp], #16
-    .cfi_adjust_cfa_offset -16
-    ldp   d2, d3,   [sp], #16
-    .cfi_adjust_cfa_offset -16
-    ldp   d4, d5,   [sp], #16
-    .cfi_adjust_cfa_offset -16
-    ldp   d6, d7,   [sp], #16
-    .cfi_adjust_cfa_offset -16
-    ldp   x29, x30, [sp], #16
-    .cfi_adjust_cfa_offset -16
+    ldp   x29, x30, [sp, #ALL_ARGS_SIZE]
     .cfi_restore x29
     .cfi_restore x30
+    RESTORE_ALL_ARGS_DECREASE_FRAME 2 * 8
 
     cbz   x17, 1f   // is method code null ?
     br    x17       // if non-null, tail call to method's code.
@@ -101,17 +95,9 @@
     tbnz  x15, #0, art_jni_dlsym_lookup_stub
 
     // Save args, the hidden arg and caller PC. No CFI needed for args and the hidden arg.
-    stp   x0, x1, [sp, #-(8 * 8 + 8 * 8 + 2 * 8)]!
-    .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + 2 * 8)
-    stp   x2, x3, [sp, #16]
-    stp   x4, x5, [sp, #32]
-    stp   x6, x7, [sp, #48]
-    stp   d0, d1, [sp, #64]
-    stp   d2, d3, [sp, #80]
-    stp   d4, d5, [sp, #96]
-    stp   d6, d7, [sp, #112]
-    stp   x15, lr, [sp, #128]
-    .cfi_rel_offset lr, 136
+    SAVE_ALL_ARGS_INCREASE_FRAME 2 * 8
+    stp   x15, lr, [sp, #ALL_ARGS_SIZE]
+    .cfi_rel_offset lr, ALL_ARGS_SIZE + 8
 
     // Call artCriticalNativeFrameSize(method, caller_pc)
     mov   x0, x15  // x0 := method (from hidden arg)
@@ -122,17 +108,9 @@
     mov   x14, x0
 
     // Restore args, the hidden arg and caller PC.
-    ldp   x2, x3, [sp, #16]
-    ldp   x4, x5, [sp, #32]
-    ldp   x6, x7, [sp, #48]
-    ldp   d0, d1, [sp, #64]
-    ldp   d2, d3, [sp, #80]
-    ldp   d4, d5, [sp, #96]
-    ldp   d6, d7, [sp, #112]
     ldp   x15, lr, [sp, #128]
     .cfi_restore lr
-    ldp   x0, x1, [sp], #(8 * 8 + 8 * 8 + 2 * 8)
-    .cfi_adjust_cfa_offset -(8 * 8 + 8 * 8 + 2 * 8)
+    RESTORE_ALL_ARGS_DECREASE_FRAME 2 * 8
 
     // Reserve space for a SaveRefsAndArgs managed frame, either for the actual runtime
     // method or for a GenericJNI frame which is similar but has a native method and a tag.
@@ -337,39 +315,89 @@
      * Read barrier for the method's declaring class needed by JNI stub for static methods.
      * (We're using a pointer to the declaring class in `ArtMethod` as `jclass`.)
      */
-    .extern artReadBarrierJni
-ENTRY art_read_barrier_jni
+    .extern artJniReadBarrier
+ENTRY art_jni_read_barrier
     // Note: Managed callee-save registers have been saved by the JNI stub.
-    // Save the method and prepare space for other managed args and return address.
-    str   x0, [sp, #-144]!
-    .cfi_adjust_cfa_offset 144
-    // Save FPR args.
-    stp   d0, d1, [sp, #16]
-    stp   d2, d3, [sp, #32]
-    stp   d4, d5, [sp, #48]
-    stp   d6, d7, [sp, #64]
-    // Save GPR args and return address.
-    stp   x1, x2, [sp, #80]
-    stp   x3, x4, [sp, #96]
-    stp   x5, x6, [sp, #112]
-    stp   x7, lr, [sp, #128]
-    .cfi_rel_offset lr, 136
+    // Save args and LR.
+    SAVE_ALL_ARGS_INCREASE_FRAME /*padding*/ 8 + /*LR*/ 8
+    str   lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)]
+    .cfi_rel_offset lr, ALL_ARGS_SIZE + /*padding*/ 8
     // The method argument is already in x0.
-    bl artReadBarrierJni  // (ArtMethod*)
-    // Restore FPR args.
-    ldp   d0, d1, [sp, #16]
-    ldp   d2, d3, [sp, #32]
-    ldp   d4, d5, [sp, #48]
-    ldp   d6, d7, [sp, #64]
-    // Restore GPR args and return address.
-    ldp   x1, x2, [sp, #80]
-    ldp   x3, x4, [sp, #96]
-    ldp   x5, x6, [sp, #112]
-    ldp   x7, lr, [sp, #128]
+    bl artJniReadBarrier  // (ArtMethod*)
+    // Restore LR and args.
+    ldr   lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)]
     .cfi_restore lr
-    // Restore method and remove spill area.
-    ldr   x0, [sp], #144
-    .cfi_adjust_cfa_offset -144
+    RESTORE_ALL_ARGS_DECREASE_FRAME /*padding*/ 8 + /*LR*/ 8
     // Return.
     ret
-END art_read_barrier_jni
+END art_jni_read_barrier
+
+    /*
+     * Entry from JNI stub that tries to lock the object in a fast path and
+     * calls `artLockObjectFromCode()` (the same as for managed code) for the
+     * difficult cases, may block for GC.
+     * Custom calling convention:
+     *     x15 holds the non-null object to lock.
+     *     Callee-save registers have been saved and can be used as temporaries.
+     *     All argument registers need to be preserved.
+     */
+    .extern artLockObjectFromCode
+ENTRY art_jni_lock_object
+    LOCK_OBJECT_FAST_PATH x15, .Llock_object_jni_slow, /*can_be_null*/ 0
+
+.Llock_object_jni_slow:
+    SAVE_ALL_ARGS_INCREASE_FRAME /*padding*/ 8 + /*LR*/ 8
+    str    lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)]
+    .cfi_rel_offset lr, ALL_ARGS_SIZE + /*padding*/ 8
+    // Call `artLockObjectFromCode()`.
+    mov    x0, x15                    // Pass the object to lock.
+    mov    x1, xSELF                  // Pass Thread::Current().
+    bl     artLockObjectFromCode      // (Object* obj, Thread*)
+    // Restore return address.
+    ldr    lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)]
+    .cfi_restore lr
+    // Check result.
+    cbnz   x0, 1f
+    // Restore register args x0-x7, d0-d7 and return.
+    RESTORE_ALL_ARGS_DECREASE_FRAME /*padding*/ 8 + /*LR*/ 8
+    ret
+    .cfi_adjust_cfa_offset (ALL_ARGS_SIZE + /*padding*/ 8 + /*LR*/ 8)
+1:
+    // All args are irrelevant when throwing an exception. Remove the spill area.
+    DECREASE_FRAME (ALL_ARGS_SIZE + /*padding*/ 8 + /*LR*/ 8)
+    // Make a tail call to `artDeliverPendingExceptionFromCode()`.
+    // Rely on the JNI transition frame constructed in the JNI stub.
+    mov    x0, xSELF                           // Pass Thread::Current().
+    b      artDeliverPendingExceptionFromCode  // (Thread*)
+END art_jni_lock_object
+
+    /*
+     * Entry from JNI stub that tries to unlock the object in a fast path and calls
+     * `artJniUnlockObject()` for the difficult cases. Note that failure to unlock
+     * is fatal, so we do not need to check for exceptions in the slow path.
+     * Custom calling convention:
+     *     x15 holds the non-null object to unlock.
+     *     Callee-save registers have been saved and can be used as temporaries.
+     *     Return registers r0 and d0 need to be preserved.
+     */
+    .extern artJniUnlockObject
+ENTRY art_jni_unlock_object
+    UNLOCK_OBJECT_FAST_PATH x15, .Lunlock_object_jni_slow, /*can_be_null*/ 0
+
+ .Lunlock_object_jni_slow:
+    // Save return registers and return address.
+    stp    x0, lr, [sp, #-32]!
+    .cfi_adjust_cfa_offset 32
+    .cfi_rel_offset lr, 8
+    str    d0, [sp, #16]
+    // Call `artJniUnlockObject()`.
+    mov    x0, x15                    // Pass the object to unlock.
+    mov    x1, xSELF                  // Pass Thread::Current().
+    bl     artJniUnlockObject         // (Object* obj, Thread*)
+    // Restore return registers and return.
+    ldr    d0, [sp, #16]
+    ldp    x0, lr, [sp], #32
+    .cfi_adjust_cfa_offset -32
+    .cfi_restore lr
+    ret
+END art_jni_unlock_object
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 75567dd..a4ac689 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -881,38 +881,6 @@
     br  xIP1
 END art_quick_do_long_jump
 
-.macro LOCK_OBJECT_FAST_PATH obj, slow_lock, can_be_null
-    // Use scratch registers x8-x11 as temporaries.
-    ldr    w9, [xSELF, #THREAD_ID_OFFSET]
-    .if \can_be_null
-        cbz    \obj, \slow_lock
-    .endif
-                                      // Exclusive load/store has no immediate anymore.
-    add    x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET
-1:
-    ldaxr  w10, [x8]                  // Acquire needed only in most common case.
-    eor    w11, w10, w9               // Prepare the value to store if unlocked
-                                      //   (thread id, count of 0 and preserved read barrier bits),
-                                      // or prepare to compare thread id for recursive lock check
-                                      //   (lock_word.ThreadId() ^ self->ThreadId()).
-    tst    w10, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // Test the non-gc bits.
-    b.ne   2f                         // Check if unlocked.
-    // Unlocked case - store w11: original lock word plus thread id, preserved read barrier bits.
-    stxr   w10, w11, [x8]
-    cbnz   w10, 1b                    // If the store failed, retry.
-    ret
-2:  // w10: original lock word, w9: thread id, w11: w10 ^ w11
-                                      // Check lock word state and thread id together,
-    tst    w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
-    b.ne   \slow_lock
-    add    w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // Increment the recursive lock count.
-    tst    w11, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED  // Test the new thin lock count.
-    b.eq   \slow_lock                 // Zero as the new count indicates overflow, go slow path.
-    stxr   w10, w11, [x8]
-    cbnz   w10, 1b                    // If the store failed, retry.
-    ret
-.endm
-
     /*
      * Entry from managed code that tries to lock the object in a fast path and
      * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
@@ -937,48 +905,6 @@
     RETURN_IF_W0_IS_ZERO_OR_DELIVER
 END art_quick_lock_object_no_inline
 
-.macro UNLOCK_OBJECT_FAST_PATH obj, slow_unlock, can_be_null
-    // Use scratch registers x8-x11 as temporaries.
-    ldr    w9, [xSELF, #THREAD_ID_OFFSET]
-    .if \can_be_null
-        cbz    \obj, \slow_unlock
-    .endif
-                                      // Exclusive load/store has no immediate anymore.
-    add    x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET
-1:
-#ifndef USE_READ_BARRIER
-    ldr    w10, [x8]
-#else
-    ldxr   w10, [x8]                  // Need to use atomic instructions for read barrier.
-#endif
-    eor    w11, w10, w9               // Prepare the value to store if simply locked
-                                      //   (mostly 0s, and preserved read barrier bits),
-                                      // or prepare to compare thread id for recursive lock check
-                                      //   (lock_word.ThreadId() ^ self->ThreadId()).
-    tst    w11, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // Test the non-gc bits.
-    b.ne   2f                         // Locked recursively or by other thread?
-    // Transition to unlocked.
-#ifndef USE_READ_BARRIER
-    stlr   w11, [x8]
-#else
-    stlxr  w10, w11, [x8]             // Need to use atomic instructions for read barrier.
-    cbnz   w10, 1b                    // If the store failed, retry.
-#endif
-    ret
-2:
-                                      // Check lock word state and thread id together.
-    tst    w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
-    b.ne   \slow_unlock
-    sub    w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
-#ifndef USE_READ_BARRIER
-    str    w11, [x8]
-#else
-    stxr   w10, w11, [x8]             // Need to use atomic instructions for read barrier.
-    cbnz   w10, 1b                    // If the store failed, retry.
-#endif
-    ret
-.endm
-
     /*
      * Entry from managed code that tries to unlock the object in a fast path and calls
      * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
@@ -1005,91 +931,6 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from JNI stub that tries to lock the object in a fast path and
-     * calls `artLockObjectFromCode()` (the same as for managed code) for the
-     * difficult cases, may block for GC.
-     * Custom calling convention:
-     *     x15 holds the non-null object to lock.
-     *     Callee-save registers have been saved and can be used as temporaries.
-     *     All argument registers need to be preserved.
-     */
-ENTRY art_quick_lock_object_jni
-    LOCK_OBJECT_FAST_PATH x15, .Llock_object_jni_slow, /*can_be_null*/ 0
-
-.Llock_object_jni_slow:
-    // Save register args x0-x7, d0-d7 and return address.
-    stp    x0, x1, [sp, #-(8 * 8 + 8 * 8 + /*padding*/ 8 + 8)]!
-    .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
-    stp    x2, x3, [sp, #16]
-    stp    x4, x5, [sp, #32]
-    stp    x6, x7, [sp, #48]
-    stp    d0, d1, [sp, #64]
-    stp    d2, d3, [sp, #80]
-    stp    d4, d5, [sp, #96]
-    stp    d6, d7, [sp, #112]
-    str    lr, [sp, #136]
-    .cfi_rel_offset lr, 136
-    // Call `artLockObjectFromCode()`
-    mov    x0, x15                    // Pass the object to lock.
-    mov    x1, xSELF                  // Pass Thread::Current().
-    bl     artLockObjectFromCode      // (Object* obj, Thread*)
-    // Restore return address.
-    ldr    lr, [sp, #136]
-    .cfi_restore lr
-    // Check result.
-    cbnz   x0, 1f
-    // Restore register args x0-x7, d0-d7 and return.
-    ldp    x2, x3, [sp, #16]
-    ldp    x4, x5, [sp, #32]
-    ldp    x6, x7, [sp, #48]
-    ldp    d0, d1, [sp, #64]
-    ldp    d2, d3, [sp, #80]
-    ldp    d4, d5, [sp, #96]
-    ldp    d6, d7, [sp, #112]
-    ldp    x0, x1, [sp], #(8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
-    .cfi_adjust_cfa_offset -(8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
-    ret
-    .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
-1:
-    // All args are irrelevant when throwing an exception. Remove the spill area.
-    DECREASE_FRAME (8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
-    // Make a tail call to `artDeliverPendingExceptionFromCode()`.
-    // Rely on the JNI transition frame constructed in the JNI stub.
-    mov    x0, xSELF                           // Pass Thread::Current().
-    b      artDeliverPendingExceptionFromCode  // (Thread*)
-END art_quick_lock_object_jni
-
-    /*
-     * Entry from JNI stub that tries to unlock the object in a fast path and calls
-     * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock
-     * is fatal, so we do not need to check for exceptions in the slow path.
-     * Custom calling convention:
-     *     x15 holds the non-null object to unlock.
-     *     Callee-save registers have been saved and can be used as temporaries.
-     *     Return registers r0 and d0 need to be preserved.
-     */
-ENTRY art_quick_unlock_object_jni
-    UNLOCK_OBJECT_FAST_PATH x15, .Lunlock_object_jni_slow, /*can_be_null*/ 0
-
- .Lunlock_object_jni_slow:
-    // Save return registers and return address.
-    stp    x0, lr, [sp, #-32]!
-    .cfi_adjust_cfa_offset 32
-    .cfi_rel_offset lr, 8
-    str    d0, [sp, #16]
-    // Call `artUnlockObjectFromJni()`
-    mov    x0, x15                    // Pass the object to unlock.
-    mov    x1, xSELF                  // Pass Thread::Current().
-    bl     artUnlockObjectFromJni     // (Object* obj, Thread*)
-    // Restore return registers and return.
-    ldr    d0, [sp, #16]
-    ldp    x0, lr, [sp], #32
-    .cfi_adjust_cfa_offset -32
-    .cfi_restore lr
-    ret
-END art_quick_unlock_object_jni
-
-    /*
      * Entry from managed code that calls artInstanceOfFromCode and on failure calls
      * artThrowClassCastExceptionForObject.
      */
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index bd8fe24..c42aa67 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -409,4 +409,78 @@
     DELIVER_PENDING_EXCEPTION
 END_MACRO
 
+// Locking is needed for both managed code and JNI stubs.
+MACRO4(LOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_lock)
+1:
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax  // EAX := lock word
+    movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp)  // tmp: thread id.
+    xorl %eax, REG_VAR(tmp)               // tmp: thread id with count 0 + read barrier bits.
+    testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax  // Test the non-gc bits.
+    jnz  2f                               // Check if unlocked.
+    // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits.
+                                          // EAX: old val, tmp: new val.
+    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+    jnz  1b                               // cmpxchg failed retry
+    .ifnc \saved_eax, none
+        movl REG_VAR(saved_eax), %eax     // Restore EAX.
+    .endif
+    ret
+2:  // EAX: original lock word, tmp: thread id ^ EAX
+                                          // Check lock word state and thread id together,
+    testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
+          REG_VAR(tmp)
+    jne  \slow_lock                       // Slow path if either of the two high bits are set.
+                                          // Increment the recursive lock count.
+    leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
+    testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp)
+    jz   \slow_lock                       // If count overflowed, go to slow lock.
+    // Update lockword for recursive lock, cmpxchg necessary for read barrier bits.
+                                          // EAX: old val, tmp: new val.
+    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+    jnz  1b                               // cmpxchg failed retry
+    .ifnc \saved_eax, none
+        movl REG_VAR(saved_eax), %eax     // Restore EAX.
+    .endif
+    ret
+END_MACRO
+
+// Unlocking is needed for both managed code and JNI stubs.
+MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_unlock)
+1:
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax  // EAX := lock word
+    movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp)  // tmp := thread id
+    xorl %eax, REG_VAR(tmp)               // tmp := thread id ^ lock word
+    test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp)
+    jnz  2f                               // Check if simply locked.
+    // Transition to unlocked.
+#ifndef USE_READ_BARRIER
+    movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+#else
+    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+    jnz  1b                               // cmpxchg failed retry
+#endif
+    .ifnc \saved_eax, none
+        movl REG_VAR(saved_eax), %eax     // Restore EAX.
+    .endif
+    ret
+2:  // EAX: original lock word, tmp: lock_word ^ thread id
+                                          // Check lock word state and thread id together.
+    testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
+          REG_VAR(tmp)
+    jnz  \slow_unlock
+    // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits.
+                                          // tmp: new lock word with decremented count.
+    leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
+#ifndef USE_READ_BARRIER
+    movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+#else
+    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+    jnz  1b                               // cmpxchg failed retry
+#endif
+    .ifnc \saved_eax, none
+        movl REG_VAR(saved_eax), %eax     // Restore EAX.
+    .endif
+    ret
+END_MACRO
+
 #endif  // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 1a56e43..66d6f39 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -97,7 +97,6 @@
   qpoints->pMemcpy = art_quick_memcpy;
 
   // Read barrier.
-  qpoints->pReadBarrierJni = art_read_barrier_jni;
   UpdateReadBarrierEntrypoints(qpoints, /*is_active=*/ false);
   qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (ESP) to pass arguments.
   // x86 has only 8 core registers.
diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S
index 78c71ba..049a0bd 100644
--- a/runtime/arch/x86/jni_entrypoints_x86.S
+++ b/runtime/arch/x86/jni_entrypoints_x86.S
@@ -16,6 +16,38 @@
 
 #include "asm_support_x86.S"
 
+#define MANAGED_ARGS_SAVE_SIZE /*xmm0-xmm3*/ 4 * 8 + /*padding*/ 4 + /* GPR args */ 4 * 4
+
+// Save register args and adds space for outgoing arguments.
+// With `call_args_space = 0`, the ESP shall be 8-byte aligned but not 16-byte aligned,
+// so either the `call_args_space` should be 8 (or 24, 40, ...) or the user of the macro
+// needs to adjust the ESP explicitly afterwards.
+MACRO2(SAVE_MANAGED_ARGS_INCREASE_FRAME, eax_value, call_args_space)
+    // Return address is on the stack.
+    PUSH_ARG ebx
+    PUSH_ARG edx
+    PUSH_ARG ecx
+    PUSH_ARG \eax_value
+    // Make xmm<n> spill slots 8-byte aligned.
+    INCREASE_FRAME (\call_args_space + /*FPRs*/ 4 * 8 + /*padding*/ 4)
+    movsd %xmm0, \call_args_space + 0(%esp)
+    movsd %xmm1, \call_args_space + 8(%esp)
+    movsd %xmm2, \call_args_space + 16(%esp)
+    movsd %xmm3, \call_args_space + 24(%esp)
+END_MACRO
+
+MACRO1(RESTORE_MANAGED_ARGS_DECREASE_FRAME, call_args_space)
+    movsd \call_args_space + 0(%esp), %xmm0
+    movsd \call_args_space + 8(%esp), %xmm1
+    movsd \call_args_space + 16(%esp), %xmm2
+    movsd \call_args_space + 24(%esp), %xmm3
+    DECREASE_FRAME \call_args_space + /*FPR args*/ 4 * 8 + /*padding*/ 4
+    POP_ARG eax
+    POP_ARG ecx
+    POP_ARG edx
+    POP_ARG ebx
+END_MACRO
+
     /*
      * Jni dlsym lookup stub.
      */
@@ -207,37 +239,87 @@
      * Read barrier for the method's declaring class needed by JNI stub for static methods.
      * (We're using a pointer to the declaring class in `ArtMethod` as `jclass`.)
      */
-DEFINE_FUNCTION art_read_barrier_jni
+DEFINE_FUNCTION art_jni_read_barrier
     // Note: Managed callee-save registers have been saved by the JNI stub.
-    // Save managed GPR args.
-    PUSH_ARG ebx
-    PUSH_ARG edx
-    PUSH_ARG ecx
-    // Save the method.
-    PUSH_ARG eax
-    // Increase frame: argument (4), padding (4), aligned FPR args save area (4 * 8), padding (4).
-    INCREASE_FRAME 4 + 4 + 4 * 8 + 4
-    // Save FPR args.
-    movsd %xmm0, 8(%esp)
-    movsd %xmm1, 16(%esp)
-    movsd %xmm2, 24(%esp)
-    movsd %xmm3, 32(%esp)
+    // Save register args EAX, ECX, EDX, EBX, mmx0-mmx3, add argument space and padding.
+    SAVE_MANAGED_ARGS_INCREASE_FRAME eax, /*argument*/ 4 + /*padding*/ 4
     // Pass the method argument.
     movl %eax, (%esp);
-    call SYMBOL(artReadBarrierJni)  // (ArtMethod*)
-    // Restore FPR args.
-    movsd 8(%esp), %xmm0
-    movsd 16(%esp), %xmm1
-    movsd 24(%esp), %xmm2
-    movsd 32(%esp), %xmm3
-    // Remove arg space, FPR args save area and padding.
-    DECREASE_FRAME 4 + 4 + 4 * 8 + 4
-    // Restore the method.
-    POP_ARG eax
-    // Restore managed args.
-    POP_ARG ecx
-    POP_ARG edx
-    POP_ARG ebx
-    // Return.
+    call SYMBOL(artJniReadBarrier)  // (ArtMethod*)
+    // Restore register args EAX, ECX, EDX, EBX, mmx0-mmx3 and return.
+    RESTORE_MANAGED_ARGS_DECREASE_FRAME /*argument*/ 4 + /*padding*/ 4
     ret
-END_FUNCTION art_read_barrier_jni
+END_FUNCTION art_jni_read_barrier
+
+    /*
+     * Entry from JNI stub that tries to lock the object in a fast path and
+     * calls `artLockObjectFromCode()` (the same as for managed code) for the
+     * difficult cases, may block for GC.
+     * Custom calling convention:
+     *     EBP holds the non-null object to lock.
+     *     Callee-save registers have been saved and can be used as temporaries (except EBP).
+     *     All argument registers need to be preserved.
+     */
+DEFINE_FUNCTION art_jni_lock_object
+    movl %eax, %edi                       // Preserve EAX in a callee-save register.
+    LOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi .Llock_object_jni_slow
+
+.Llock_object_jni_slow:
+    // Save register args EAX, ECX, EDX, EBX, mmx0-mmx3; original value of EAX is in EDI.
+    SAVE_MANAGED_ARGS_INCREASE_FRAME edi, /*call_args_space*/ 0
+    // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call.
+    // Call `artLockObjectFromCode()`
+    pushl %fs:THREAD_SELF_OFFSET          // Pass Thread::Current().
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH_ARG ebp                          // Pass the object to lock.
+    call SYMBOL(artLockObjectFromCode)    // (object, Thread*)
+    // Check result.
+    testl %eax, %eax
+    jnz   1f
+    // Restore register args EAX, ECX, EDX, EBX, mmx0-mmx3 and return.
+    RESTORE_MANAGED_ARGS_DECREASE_FRAME /*call_args_space*/ 8
+    ret
+    .cfi_adjust_cfa_offset (/*call args*/ 8 + MANAGED_ARGS_SAVE_SIZE)
+1:
+    // All args are irrelevant when throwing an exception.
+    // Remove the spill area except for new padding to align stack.
+    DECREASE_FRAME (/*call args*/ 8 + MANAGED_ARGS_SAVE_SIZE - /*new padding*/ 8)
+    // Rely on the JNI transition frame constructed in the JNI stub.
+    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    call SYMBOL(artDeliverPendingExceptionFromCode)  // (Thread*)
+    UNREACHABLE
+END_FUNCTION art_jni_lock_object
+
+    /*
+     * Entry from JNI stub that tries to unlock the object in a fast path and calls
+     * `artJniUnlockObject()` for the difficult cases. Note that failure to unlock
+     * is fatal, so we do not need to check for exceptions in the slow path.
+     * Custom calling convention:
+     *     EBP holds the non-null object to unlock.
+     *     Callee-save registers have been saved and can be used as temporaries (except EBP).
+     *     Return registers EAX, EDX and mmx0 need to be preserved.
+     */
+DEFINE_FUNCTION art_jni_unlock_object
+    movl %eax, %edi                       // Preserve EAX in a different register.
+    UNLOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi, .Lunlock_object_jni_slow
+
+ .Lunlock_object_jni_slow:
+    // Save return registers.
+    PUSH_ARG edx
+    PUSH_ARG edi  // Original contents of EAX.
+    INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 4
+    movsd %xmm0, 0(%esp)
+    // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call.
+    // Call `artJniUnlockObject()`.
+    pushl %fs:THREAD_SELF_OFFSET          // Pass Thread::Current().
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH_ARG ebp                          // Pass the object to unlock.
+    call SYMBOL(artJniUnlockObject)       // (object, Thread*)
+    // Restore return registers and return.
+    movsd 8(%esp), %xmm0
+    DECREASE_FRAME /*call args*/ 8 + /*xmm0*/ 8 + /*padding*/ 4
+    POP_ARG eax
+    POP_ARG edx
+    ret
+END_FUNCTION art_jni_unlock_object
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index d16f15c..7f1311c 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1133,40 +1133,6 @@
 
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
-MACRO4(LOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_lock)
-1:
-    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax  // EAX := lock word
-    movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp)  // tmp: thread id.
-    xorl %eax, REG_VAR(tmp)               // tmp: thread id with count 0 + read barrier bits.
-    testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax  // Test the non-gc bits.
-    jnz  2f                               // Check if unlocked.
-    // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits.
-                                          // EAX: old val, tmp: new val.
-    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-    jnz  1b                               // cmpxchg failed retry
-    .ifnc \saved_eax, none
-        movl REG_VAR(saved_eax), %eax     // Restore EAX.
-    .endif
-    ret
-2:  // EAX: original lock word, tmp: thread id ^ EAX
-                                          // Check lock word state and thread id together,
-    testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
-          REG_VAR(tmp)
-    jne  \slow_lock                       // Slow path if either of the two high bits are set.
-                                          // Increment the recursive lock count.
-    leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
-    testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp)
-    jz   \slow_lock                       // If count overflowed, go to slow lock.
-    // Update lockword for recursive lock, cmpxchg necessary for read barrier bits.
-                                          // EAX: old val, tmp: new val.
-    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-    jnz  1b                               // cmpxchg failed retry
-    .ifnc \saved_eax, none
-        movl REG_VAR(saved_eax), %eax     // Restore EAX.
-    .endif
-    ret
-END_MACRO
-
     /*
      * Entry from managed code that tries to lock the object in a fast path and
      * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
@@ -1200,44 +1166,6 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object_no_inline
 
-MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_unlock)
-1:
-    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax  // EAX := lock word
-    movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp)  // tmp := thread id
-    xorl %eax, REG_VAR(tmp)               // tmp := thread id ^ lock word
-    test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp)
-    jnz  2f                               // Check if simply locked.
-    // Transition to unlocked.
-#ifndef USE_READ_BARRIER
-    movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-#else
-    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-    jnz  1b                               // cmpxchg failed retry
-#endif
-    .ifnc \saved_eax, none
-        movl REG_VAR(saved_eax), %eax     // Restore EAX.
-    .endif
-    ret
-2:  // EAX: original lock word, tmp: lock_word ^ thread id
-                                          // Check lock word state and thread id together.
-    testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
-          REG_VAR(tmp)
-    jnz  \slow_unlock
-    // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits.
-                                          // tmp: new lock word with decremented count.
-    leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
-#ifndef USE_READ_BARRIER
-    movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-#else
-    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-    jnz  1b                               // cmpxchg failed retry
-#endif
-    .ifnc \saved_eax, none
-        movl REG_VAR(saved_eax), %eax     // Restore EAX.
-    .endif
-    ret
-END_MACRO
-
     /*
      * Entry from managed code that tries to unlock the object in a fast path and calls
      * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
@@ -1272,97 +1200,6 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
-    /*
-     * Entry from JNI stub that tries to lock the object in a fast path and
-     * calls `artLockObjectFromCode()` (the same as for managed code) for the
-     * difficult cases, may block for GC.
-     * Custom calling convention:
-     *     EBP holds the non-null object to lock.
-     *     Callee-save registers have been saved and can be used as temporaries (except EBP).
-     *     All argument registers need to be preserved.
-     */
-DEFINE_FUNCTION art_quick_lock_object_jni
-    movl %eax, %edi                       // Preserve EAX in a callee-save register.
-    LOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi .Llock_object_jni_slow
-
-.Llock_object_jni_slow:
-    // Save register args EAX, ECX, EDX, EBX, mmx0-mmx3 and align stack.
-    PUSH_ARG ebx
-    PUSH_ARG edx
-    PUSH_ARG ecx
-    PUSH_ARG edi  // Original contents of EAX.
-    INCREASE_FRAME (/*FPRs*/ 4 * 8 + /*padding*/ 4)  // Make xmm<n> spill slots 8-byte aligned.
-    movsd %xmm0, 0(%esp)
-    movsd %xmm1, 8(%esp)
-    movsd %xmm2, 16(%esp)
-    movsd %xmm3, 24(%esp)
-    // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call.
-    // Call `artLockObjectFromCode()`
-    pushl %fs:THREAD_SELF_OFFSET          // Pass Thread::Current().
-    CFI_ADJUST_CFA_OFFSET(4)
-    PUSH_ARG ebp                          // Pass the object to lock.
-    call SYMBOL(artLockObjectFromCode)    // (object, Thread*)
-    // Check result.
-    testl %eax, %eax
-    jnz   1f
-    // Restore register args EAX, ECX, EDX, EBX, mmx0-mmx3 and return.
-    movsd 8(%esp), %xmm0
-    movsd 16(%esp), %xmm1
-    movsd 24(%esp), %xmm2
-    movsd 32(%esp), %xmm3
-    DECREASE_FRAME /*call args*/ 8 + /*FPR args*/ 4 * 8 + /*padding*/ 4
-    POP_ARG eax
-    POP_ARG ecx
-    POP_ARG edx
-    POP_ARG ebx
-    ret
-    .cfi_adjust_cfa_offset (/*call args*/ 8 + /*FPRs*/ 4 * 8 + /*padding*/ 4 + /*GPRs*/ 4 * 4)
-1:
-    // All args are irrelevant when throwing an exception.
-    // Remove the spill area except for new padding to align stack.
-    DECREASE_FRAME \
-        (/*call args*/ 8 + /*FPRs*/ 4 * 8 + /*padding*/ 4 + /*GPRs*/ 4 * 4 - /*new padding*/ 8)
-    // Rely on the JNI transition frame constructed in the JNI stub.
-    pushl %fs:THREAD_SELF_OFFSET          // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artDeliverPendingExceptionFromCode)  // (Thread*)
-    UNREACHABLE
-END_FUNCTION art_quick_lock_object_jni
-
-    /*
-     * Entry from JNI stub that tries to unlock the object in a fast path and calls
-     * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock
-     * is fatal, so we do not need to check for exceptions in the slow path.
-     * Custom calling convention:
-     *     EBP holds the non-null object to unlock.
-     *     Callee-save registers have been saved and can be used as temporaries (except EBP).
-     *     Return registers EAX, EDX and mmx0 need to be preserved.
-     */
-    .extern artLockObjectFromJni
-DEFINE_FUNCTION art_quick_unlock_object_jni
-    movl %eax, %edi                       // Preserve EAX in a different register.
-    UNLOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi, .Lunlock_object_jni_slow
-
- .Lunlock_object_jni_slow:
-    // Save return registers.
-    PUSH_ARG edx
-    PUSH_ARG edi  // Original contents of EAX.
-    INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 4
-    movsd %xmm0, 0(%esp)
-    // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call.
-    // Call `artUnlockObjectFromJni()`
-    pushl %fs:THREAD_SELF_OFFSET          // Pass Thread::Current().
-    CFI_ADJUST_CFA_OFFSET(4)
-    PUSH_ARG ebp                          // Pass the object to unlock.
-    call SYMBOL(artUnlockObjectFromJni)   // (object, Thread*)
-    // Restore return registers and return.
-    movsd 8(%esp), %xmm0
-    DECREASE_FRAME /*call args*/ 8 + /*xmm0*/ 8 + /*padding*/ 4
-    POP_ARG eax
-    POP_ARG edx
-    ret
-END_FUNCTION art_quick_unlock_object_jni
-
 DEFINE_FUNCTION art_quick_instance_of
     PUSH eax                              // alignment padding
     PUSH ecx                              // pass arg2 - obj->klass
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index 60380a8..bfec8c0 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -493,4 +493,72 @@
     DELIVER_PENDING_EXCEPTION
 END_MACRO
 
+// Locking is needed for both managed code and JNI stubs.
+MACRO3(LOCK_OBJECT_FAST_PATH, obj, tmp, slow_lock)
+1:
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax  // EAX := lock word
+    movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp)  // tmp: thread id.
+    xorl %eax, REG_VAR(tmp)               // tmp: thread id with count 0 + read barrier bits.
+    testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax  // Test the non-gc bits.
+    jnz  2f                               // Check if unlocked.
+    // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits.
+    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+    jnz  1b                               // cmpxchg failed retry
+    ret
+2:  // EAX: original lock word, tmp: thread id ^ EAX
+                                          // Check lock word state and thread id together,
+    testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
+          REG_VAR(tmp)
+    jne  \slow_lock                       // Slow path if either of the two high bits are set.
+                                          // Increment the recursive lock count.
+    leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
+    testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp)
+    je   \slow_lock                       // If count overflowed, go to slow lock.
+    // Update lockword for recursive lock, cmpxchg necessary for read barrier bits.
+                                          // EAX: old val, tmp: new val.
+    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+    jnz  1b                               // cmpxchg failed retry
+    ret
+END_MACRO
+
+// Unlocking is needed for both managed code and JNI stubs.
+MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_rax, slow_unlock)
+1:
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax  // EAX := lock word
+    movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp)  // tmp := thread id
+    xorl %eax, REG_VAR(tmp)               // tmp := thread id ^ lock word
+    test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp)
+    jnz  2f                               // Check if simply locked.
+    // Transition to unlocked.
+#ifndef USE_READ_BARRIER
+    movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+#else
+    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+    jnz  1b                               // cmpxchg failed retry
+#endif
+    .ifnc \saved_rax, none
+        movq REG_VAR(saved_rax), %rax     // Restore RAX.
+    .endif
+    ret
+2:  // EAX: original lock word, tmp: lock_word ^ thread id
+                                          // Check lock word state and thread id together.
+    testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
+          REG_VAR(tmp)
+    jnz  \slow_unlock
+    // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits.
+                                          // tmp: new lock word with decremented count.
+    leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
+#ifndef USE_READ_BARRIER
+                                          // EAX: new lock word with decremented count.
+    movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+#else
+    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+    jnz  1b                               // cmpxchg failed retry
+#endif
+    .ifnc \saved_rax, none
+        movq REG_VAR(saved_rax), %rax     // Restore RAX.
+    .endif
+    ret
+END_MACRO
+
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 5e69edf..bf6add4 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -119,7 +119,6 @@
   qpoints->pMemcpy = art_quick_memcpy;
 
   // Read barrier.
-  qpoints->pReadBarrierJni = art_read_barrier_jni;
   UpdateReadBarrierEntrypoints(qpoints, /*is_active=*/ false);
   qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (RSP) to pass arguments.
   // x86-64 has only 16 core registers.
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
index e5329a6..dca4128 100644
--- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -16,6 +16,45 @@
 
 #include "asm_support_x86_64.S"
 
+#define MANAGED_ARGS_SAVE_SIZE /*xmm0-xmm7*/ 8 * 8 + /*padding*/ 8 + /* GPR args */ 6 * 8
+
+MACRO0(SAVE_MANAGED_ARGS_INCREASE_FRAME)
+    // Return address is on the stack.
+    PUSH_ARG r9
+    PUSH_ARG r8
+    PUSH_ARG rcx
+    PUSH_ARG rdx
+    PUSH_ARG rsi
+    PUSH_ARG rdi
+    INCREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8)
+    movsd %xmm0, 0(%rsp)
+    movsd %xmm1, 8(%rsp)
+    movsd %xmm2, 16(%rsp)
+    movsd %xmm3, 24(%rsp)
+    movsd %xmm4, 32(%rsp)
+    movsd %xmm5, 40(%rsp)
+    movsd %xmm6, 48(%rsp)
+    movsd %xmm7, 56(%rsp)
+END_MACRO
+
+MACRO0(RESTORE_MANAGED_ARGS_DECREASE_FRAME)
+    movsd 0(%rsp), %xmm0
+    movsd 8(%rsp), %xmm1
+    movsd 16(%rsp), %xmm2
+    movsd 24(%rsp), %xmm3
+    movsd 32(%rsp), %xmm4
+    movsd 40(%rsp), %xmm5
+    movsd 48(%rsp), %xmm6
+    movsd 56(%rsp), %xmm7
+    DECREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8)
+    POP_ARG rdi
+    POP_ARG rsi
+    POP_ARG rdx
+    POP_ARG rcx
+    POP_ARG r8
+    POP_ARG r9
+END_MACRO
+
     /*
      * Jni dlsym lookup stub.
      */
@@ -317,48 +356,76 @@
      * Read barrier for the method's declaring class needed by JNI stub for static methods.
      * (We're using a pointer to the declaring class in `ArtMethod` as `jclass`.)
      */
-DEFINE_FUNCTION art_read_barrier_jni
+DEFINE_FUNCTION art_jni_read_barrier
     // Note: Managed callee-save registers have been saved by the JNI stub.
-    // Save managed GPR args.
-    PUSH_ARG r9
-    PUSH_ARG r8
-    PUSH_ARG rsi
-    PUSH_ARG rdx
-    PUSH_ARG rcx
-    // Save the method.
-    PUSH_ARG rdi
-    // Increase frame: FPR args save area (8 * 8), padding.
-    INCREASE_FRAME 8 * 8 + 8
-    // Save FPR args.
-    movq %xmm0, 0(%rsp)
-    movq %xmm1, 8(%rsp)
-    movq %xmm2, 16(%rsp)
-    movq %xmm3, 24(%rsp)
-    movq %xmm4, 32(%rsp)
-    movq %xmm5, 40(%rsp)
-    movq %xmm6, 48(%rsp)
-    movq %xmm7, 56(%rsp)
+    // Save register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and align stack.
+    SAVE_MANAGED_ARGS_INCREASE_FRAME
     // The method argument is already in RDI.
-    call SYMBOL(artReadBarrierJni)  // (ArtMethod*)
-    // Restore FPR args.
-    movq 0(%rsp), %xmm0
-    movq 8(%rsp), %xmm1
-    movq 16(%rsp), %xmm2
-    movq 24(%rsp), %xmm3
-    movq 32(%rsp), %xmm4
-    movq 40(%rsp), %xmm5
-    movq 48(%rsp), %xmm6
-    movq 56(%rsp), %xmm7
-    // Remove FPR args save area, padding.
-    DECREASE_FRAME 8 * 8 + 8
-    // Restore the method.
-    POP_ARG rdi
-    // Restore managed args.
-    POP_ARG rcx
-    POP_ARG rdx
-    POP_ARG rsi
-    POP_ARG r8
-    POP_ARG r9
-    // Return.
+    call SYMBOL(artJniReadBarrier)  // (ArtMethod*)
+    // Restore register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and return.
+    RESTORE_MANAGED_ARGS_DECREASE_FRAME
     ret
-END_FUNCTION art_read_barrier_jni
+END_FUNCTION art_jni_read_barrier
+
+    /*
+     * Entry from JNI stub that tries to lock the object in a fast path and
+     * calls `artLockObjectFromCode()` (the same as for managed code) for the
+     * difficult cases, may block for GC.
+     * Custom calling convention:
+     *     RBX holds the non-null object to lock.
+     *     Callee-save registers have been saved and can be used as temporaries (except RBX).
+     *     All argument registers need to be preserved.
+     */
+DEFINE_FUNCTION art_jni_lock_object
+    LOCK_OBJECT_FAST_PATH rbx, ebp, .Llock_object_jni_slow
+
+.Llock_object_jni_slow:
+    // Save register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and align stack.
+    SAVE_MANAGED_ARGS_INCREASE_FRAME
+    // Call `artLockObjectFromCode()`
+    movq %rbx, %rdi                       // Pass the object to lock.
+    movq %gs:THREAD_SELF_OFFSET, %rsi     // Pass Thread::Current().
+    call SYMBOL(artLockObjectFromCode)    // (object, Thread*)
+    // Check result.
+    testl %eax, %eax
+    jnz   1f
+    // Restore register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and return.
+    RESTORE_MANAGED_ARGS_DECREASE_FRAME
+    ret
+    .cfi_adjust_cfa_offset MANAGED_ARGS_SAVE_SIZE
+1:
+    // All args are irrelevant when throwing an exception. Remove the spill area.
+    DECREASE_FRAME MANAGED_ARGS_SAVE_SIZE
+    // Rely on the JNI transition frame constructed in the JNI stub.
+    movq %gs:THREAD_SELF_OFFSET, %rdi     // Pass Thread::Current().
+    jmp  SYMBOL(artDeliverPendingExceptionFromCode)  // (Thread*); tail call.
+END_FUNCTION art_jni_lock_object
+
+    /*
+     * Entry from JNI stub that tries to unlock the object in a fast path and calls
+     * `artJniUnlockObject()` for the difficult cases. Note that failure to unlock
+     * is fatal, so we do not need to check for exceptions in the slow path.
+     * Custom calling convention:
+     *     RBX holds the non-null object to unlock.
+     *     Callee-save registers have been saved and can be used as temporaries (except RBX).
+     *     Return registers RAX and mmx0 need to be preserved.
+     */
+DEFINE_FUNCTION art_jni_unlock_object
+    movq %rax, %r12                       // Preserve RAX in a different register.
+    UNLOCK_OBJECT_FAST_PATH rbx, ebp, /*saved_rax*/ r12, .Lunlock_object_jni_slow
+
+ .Lunlock_object_jni_slow:
+    // Save return registers and return address.
+    PUSH_ARG r12  // Original contents of RAX.
+    INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8
+    movsd %xmm0, 0(%rsp)
+    // Call `artJniUnlockObject()`.
+    movq %rbx, %rdi                       // Pass the object to unlock.
+    movq %gs:THREAD_SELF_OFFSET, %rsi     // Pass Thread::Current().
+    call SYMBOL(artJniUnlockObject)       // (object, Thread*)
+    // Restore return registers and return.
+    movsd 0(%rsp), %xmm0
+    DECREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8
+    POP_ARG rax
+    ret
+END_FUNCTION art_jni_unlock_object
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0671585..673696c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1068,33 +1068,6 @@
 
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
-MACRO3(LOCK_OBJECT_FAST_PATH, obj, tmp, slow_lock)
-1:
-    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax  // EAX := lock word
-    movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp)  // tmp: thread id.
-    xorl %eax, REG_VAR(tmp)               // tmp: thread id with count 0 + read barrier bits.
-    testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax  // Test the non-gc bits.
-    jnz  2f                               // Check if unlocked.
-    // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits.
-    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-    jnz  1b                               // cmpxchg failed retry
-    ret
-2:  // EAX: original lock word, tmp: thread id ^ EAX
-                                          // Check lock word state and thread id together,
-    testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
-          REG_VAR(tmp)
-    jne  \slow_lock                       // Slow path if either of the two high bits are set.
-                                          // Increment the recursive lock count.
-    leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
-    testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp)
-    je   \slow_lock                       // If count overflowed, go to slow lock.
-    // Update lockword for recursive lock, cmpxchg necessary for read barrier bits.
-                                          // EAX: old val, tmp: new val.
-    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-    jnz  1b                               // cmpxchg failed retry
-    ret
-END_MACRO
-
     /*
      * Entry from managed code that tries to lock the object in a fast path and
      * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
@@ -1119,45 +1092,6 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object_no_inline
 
-MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_rax, slow_unlock)
-1:
-    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax  // EAX := lock word
-    movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp)  // tmp := thread id
-    xorl %eax, REG_VAR(tmp)               // tmp := thread id ^ lock word
-    test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp)
-    jnz  2f                               // Check if simply locked.
-    // Transition to unlocked.
-#ifndef USE_READ_BARRIER
-    movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-#else
-    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-    jnz  1b                               // cmpxchg failed retry
-#endif
-    .ifnc \saved_rax, none
-        movq REG_VAR(saved_rax), %rax     // Restore RAX.
-    .endif
-    ret
-2:  // EAX: original lock word, tmp: lock_word ^ thread id
-                                          // Check lock word state and thread id together.
-    testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
-          REG_VAR(tmp)
-    jnz  \slow_unlock
-    // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits.
-                                          // tmp: new lock word with decremented count.
-    leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
-#ifndef USE_READ_BARRIER
-                                          // EAX: new lock word with decremented count.
-    movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-#else
-    lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
-    jnz  1b                               // cmpxchg failed retry
-#endif
-    .ifnc \saved_rax, none
-        movq REG_VAR(saved_rax), %rax     // Restore RAX.
-    .endif
-    ret
-END_MACRO
-
     /*
      * Entry from managed code that tries to unlock the object in a fast path and calls
      * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
@@ -1183,97 +1117,6 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
-    /*
-     * Entry from JNI stub that tries to lock the object in a fast path and
-     * calls `artLockObjectFromCode()` (the same as for managed code) for the
-     * difficult cases, may block for GC.
-     * Custom calling convention:
-     *     RBX holds the non-null object to lock.
-     *     Callee-save registers have been saved and can be used as temporaries (except RBX).
-     *     All argument registers need to be preserved.
-     */
-DEFINE_FUNCTION art_quick_lock_object_jni
-    LOCK_OBJECT_FAST_PATH rbx, ebp, .Llock_object_jni_slow
-
-.Llock_object_jni_slow:
-    // Save register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and align stack.
-    PUSH_ARG r9
-    PUSH_ARG r8
-    PUSH_ARG rcx
-    PUSH_ARG rdx
-    PUSH_ARG rsi
-    PUSH_ARG rdi
-    INCREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8)
-    movsd %xmm0, 0(%rsp)
-    movsd %xmm1, 8(%rsp)
-    movsd %xmm2, 16(%rsp)
-    movsd %xmm3, 24(%rsp)
-    movsd %xmm4, 32(%rsp)
-    movsd %xmm5, 40(%rsp)
-    movsd %xmm6, 48(%rsp)
-    movsd %xmm7, 56(%rsp)
-    // Call `artLockObjectFromCode()`
-    movq %rbx, %rdi                       // Pass the object to lock.
-    movq %gs:THREAD_SELF_OFFSET, %rsi     // Pass Thread::Current().
-    call SYMBOL(artLockObjectFromCode)    // (object, Thread*)
-    // Check result.
-    testl %eax, %eax
-    jnz   1f
-    // Restore register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and return.
-    movsd 0(%esp), %xmm0
-    movsd 8(%esp), %xmm1
-    movsd 16(%esp), %xmm2
-    movsd 24(%esp), %xmm3
-    movsd 32(%esp), %xmm4
-    movsd 40(%esp), %xmm5
-    movsd 48(%esp), %xmm6
-    movsd 56(%esp), %xmm7
-    DECREASE_FRAME /*FPR args*/ 8 * 8 + /*padding*/ 8
-    POP_ARG rdi
-    POP_ARG rsi
-    POP_ARG rdx
-    POP_ARG rcx
-    POP_ARG r8
-    POP_ARG r9
-    ret
-    .cfi_adjust_cfa_offset (/*FPRs*/ 8 * 8 + /*padding*/ 8 + /*GPRs*/ 6 * 8)
-1:
-    // All args are irrelevant when throwing an exception. Remove the spill area.
-    DECREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8 + /*GPRs*/ 6 * 8)
-    // Rely on the JNI transition frame constructed in the JNI stub.
-    movq %gs:THREAD_SELF_OFFSET, %rdi     // Pass Thread::Current().
-    jmp  SYMBOL(artDeliverPendingExceptionFromCode)  // (Thread*); tail call.
-END_FUNCTION art_quick_lock_object_jni
-
-    /*
-     * Entry from JNI stub that tries to unlock the object in a fast path and calls
-     * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock
-     * is fatal, so we do not need to check for exceptions in the slow path.
-     * Custom calling convention:
-     *     RBX holds the non-null object to unlock.
-     *     Callee-save registers have been saved and can be used as temporaries (except RBX).
-     *     Return registers RAX and mmx0 need to be preserved.
-     */
-DEFINE_FUNCTION art_quick_unlock_object_jni
-    movq %rax, %r12                       // Preserve RAX in a different register.
-    UNLOCK_OBJECT_FAST_PATH rbx, ebp, /*saved_rax*/ r12, .Lunlock_object_jni_slow
-
- .Lunlock_object_jni_slow:
-    // Save return registers and return address.
-    PUSH_ARG r12  // Original contents of RAX.
-    INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8
-    movsd %xmm0, 0(%rsp)
-    // Call `artUnlockObjectFromJni()`
-    movq %rbx, %rdi                       // Pass the object to unlock.
-    movq %gs:THREAD_SELF_OFFSET, %rsi     // Pass Thread::Current().
-    call SYMBOL(artUnlockObjectFromJni)   // (object, Thread*)
-    // Restore return registers and return.
-    movsd 0(%rsp), %xmm0
-    DECREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8
-    POP_ARG rax
-    ret
-END_FUNCTION art_quick_unlock_object_jni
-
 DEFINE_FUNCTION art_quick_check_instance_of
     // Type check using the bit string passes null as the target class. In that case just throw.
     testl %esi, %esi
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index f43e25f..44127e6 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -115,11 +115,11 @@
 extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
 
 // JNI read barrier entrypoint. Note: Preserves all registers.
-extern "C" void art_read_barrier_jni(art::ArtMethod* method);
+extern "C" void art_jni_read_barrier(art::ArtMethod* method);
 
 // JNI lock/unlock entrypoints. Note: Custom calling convention.
-extern "C" void art_quick_lock_object_jni(art::mirror::Object*);
-extern "C" void art_quick_unlock_object_jni(art::mirror::Object*);
+extern "C" void art_jni_lock_object(art::mirror::Object*);
+extern "C" void art_jni_unlock_object(art::mirror::Object*);
 
 // Polymorphic invoke entrypoints.
 extern "C" void art_quick_invoke_polymorphic(uint32_t, void*);
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index df52e23..1348241 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -78,8 +78,9 @@
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
   qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
   qpoints->pJniDecodeReferenceResult = JniDecodeReferenceResult;
-  qpoints->pJniLockObject = art_quick_lock_object_jni;
-  qpoints->pJniUnlockObject = art_quick_unlock_object_jni;
+  qpoints->pJniLockObject = art_jni_lock_object;
+  qpoints->pJniUnlockObject = art_jni_unlock_object;
+  qpoints->pJniReadBarrier = art_jni_read_barrier;
 
   // Locks
   if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index cf5c697..a69cc56 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -75,9 +75,9 @@
                                                   Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR;
 
-extern "C" void artReadBarrierJni(ArtMethod* method)
+extern "C" void artJniReadBarrier(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR;
-extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self)
+extern "C" void artJniUnlockObject(mirror::Object* locked, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR;
 
 // Read barrier entrypoints.
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 09ce943..aef7568 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -171,7 +171,7 @@
   V(UpdateInlineCache, void, void) \
   V(CompileOptimized, void, ArtMethod*, Thread*) \
 \
-  V(ReadBarrierJni, void, ArtMethod*) \
+  V(JniReadBarrier, void, ArtMethod*) \
   V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \
   V(ReadBarrierMarkReg01, mirror::Object*, mirror::Object*) \
   V(ReadBarrierMarkReg02, mirror::Object*, mirror::Object*) \
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 4fa37e5..b3d7f38 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -41,7 +41,7 @@
 static_assert(sizeof(IRTSegmentState) == sizeof(uint32_t), "IRTSegmentState size unexpected");
 static_assert(std::is_trivial<IRTSegmentState>::value, "IRTSegmentState not trivial");
 
-extern "C" void artReadBarrierJni(ArtMethod* method) {
+extern "C" void artJniReadBarrier(ArtMethod* method) {
   DCHECK(kUseReadBarrier);
   mirror::CompressedReference<mirror::Object>* declaring_class =
       method->GetDeclaringClassAddressWithoutBarrier();
@@ -90,7 +90,7 @@
 }
 
 // TODO: annotalysis disabled as monitor semantics are maintained in Java code.
-extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self)
+extern "C" void artJniUnlockObject(mirror::Object* locked, Thread* self)
     NO_THREAD_SAFETY_ANALYSIS REQUIRES(!Roles::uninterruptible_) {
   // Note: No thread suspension is allowed for successful unlocking, otherwise plain
   // `mirror::Object*` return value saved by the assembly stub would need to be updated.
@@ -198,7 +198,7 @@
     DCHECK(normal_native) << "@FastNative/@CriticalNative and synchronize is not supported";
     ObjPtr<mirror::Object> lock = GetGenericJniSynchronizationObject(self, called);
     DCHECK(lock != nullptr);
-    artUnlockObjectFromJni(lock.Ptr(), self);
+    artJniUnlockObject(lock.Ptr(), self);
   }
   char return_shorty_char = called->GetShorty()[0];
   if (return_shorty_char == 'L') {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index e214577..5f687ce 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1948,7 +1948,7 @@
         auto* declaring_class = reinterpret_cast<mirror::CompressedReference<mirror::Class>*>(
             method->GetDeclaringClassAddressWithoutBarrier());
         if (kUseReadBarrier) {
-          artReadBarrierJni(method);
+          artJniReadBarrier(method);
         }
         sm_.AdvancePointer(declaring_class);
       }  // else "this" reference is already handled by QuickArgumentVisitor.
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c3f1dba..00d5523 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -336,9 +336,9 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUpdateInlineCache, pCompileOptimized,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCompileOptimized, pReadBarrierJni,
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCompileOptimized, pJniReadBarrier,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMarkReg00,
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniReadBarrier, pReadBarrierMarkReg00,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg00, pReadBarrierMarkReg01,
                          sizeof(void*));
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c7ce0de..be32d92 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3561,7 +3561,7 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromString)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
-  QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
+  QUICK_ENTRY_POINT_INFO(pJniReadBarrier)
   QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg00)
   QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg01)
   QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg02)