Clean up JNI entrypoint assembly.
Move JNI entrypoints to `jni_entrypoints_<arch>.S` and
shared helper macros to `asm_support_<arch>.S`. Introduce
some new macros to reduce code duplication. Fix x86-64
using ESP in the JNI lock slow path.
Rename JNI lock/unlock and read barrier entrypoints to pull
the "jni" to the front and drop "quick" from their names.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I20d059b07b308283db6c4e36a508480d91ad07fc
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index ff95bdd..000a2d1 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -409,4 +409,97 @@
.cfi_adjust_cfa_offset -28
.endm
+// Locking is needed for both managed code and JNI stubs.
+.macro LOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_lock, can_be_null
+ ldr \tmp1, [rSELF, #THREAD_ID_OFFSET]
+ .if \can_be_null
+ cbz \obj, \slow_lock
+ .endif
+1:
+ ldrex \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ eor \tmp3, \tmp2, \tmp1 @ Prepare the value to store if unlocked
+ @ (thread id, count of 0 and preserved read barrier bits),
+ @ or prepare to compare thread id for recursive lock check
+ @ (lock_word.ThreadId() ^ self->ThreadId()).
+ ands ip, \tmp2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits.
+ bne 2f @ Check if unlocked.
+ @ unlocked case - store tmp3: original lock word plus thread id, preserved read barrier bits.
+ strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ cbnz \tmp2, 3f @ If store failed, retry.
+ dmb ish @ Full (LoadLoad|LoadStore) memory barrier.
+ bx lr
+2: @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1
+#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
+#error "Expecting thin lock count and gc state in consecutive bits."
+#endif
+ @ Check lock word state and thread id together.
+ bfc \tmp3, \
+ #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \
+ #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
+ cbnz \tmp3, \slow_lock @ if either of the top two bits are set, or the lock word's
+ @ thread id did not match, go slow path.
+ add \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count.
+ @ Extract the new thin lock count for overflow check.
+ ubfx \tmp2, \tmp3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE
+ cbz \tmp2, \slow_lock @ Zero as the new count indicates overflow, go slow path.
+ @ strex necessary for read barrier bits.
+ strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ cbnz \tmp2, 3f @ If strex failed, retry.
+ bx lr
+3:
+ b 1b @ retry
+.endm
+
+// Unlocking is needed for both managed code and JNI stubs.
+.macro UNLOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_unlock, can_be_null
+ ldr \tmp1, [rSELF, #THREAD_ID_OFFSET]
+ .if \can_be_null
+ cbz \obj, \slow_unlock
+ .endif
+1:
+#ifndef USE_READ_BARRIER
+ ldr \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+ @ Need to use atomic instructions for read barrier.
+ ldrex \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#endif
+ eor \tmp3, \tmp2, \tmp1 @ Prepare the value to store if simply locked
+ @ (mostly 0s, and preserved read barrier bits),
+ @ or prepare to compare thread id for recursive lock check
+ @ (lock_word.ThreadId() ^ self->ThreadId()).
+ ands ip, \tmp3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits.
+ bne 2f @ Locked recursively or by other thread?
+ @ Transition to unlocked.
+ dmb ish @ Full (LoadStore|StoreStore) memory barrier.
+#ifndef USE_READ_BARRIER
+ str \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+ @ strex necessary for read barrier bits
+ strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ cbnz \tmp2, 3f @ If the store failed, retry.
+#endif
+ bx lr
+2: @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1
+#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
+#error "Expecting thin lock count and gc state in consecutive bits."
+#endif
+ @ Check lock word state and thread id together,
+ bfc \tmp3, \
+ #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \
+ #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
+ cbnz \tmp3, \slow_unlock @ if either of the top two bits are set, or the lock word's
+ @ thread id did not match, go slow path.
+ sub \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count.
+#ifndef USE_READ_BARRIER
+ str \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+#else
+ @ strex necessary for read barrier bits.
+ strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ cbnz \tmp2, 3f @ If the store failed, retry.
+#endif
+ bx lr
+3:
+ b 1b @ retry
+.endm
+
#endif // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_