MIPS: Improve object lock/unlock.
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-gtest
Test: testrunner.py --target --optimizing
Test: same tests as above on CI20
Test: repeat all of the above in the configuration
ART_USE_READ_BARRIER=false
Test: booted MIPS32R2 in QEMU
Change-Id: I30fa277e1a687cb0595204e8d9053d7a4873d3df
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 7bbcbf0..a7722ef 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1252,7 +1252,39 @@
.extern artLockObjectFromCode
ENTRY art_quick_lock_object
beqz $a0, art_quick_throw_null_pointer_exception
+ li $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE
+ li $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED
+.Lretry_lock:
+ lw $t0, THREAD_ID_OFFSET(rSELF) # TODO: Can the thread ID really change during the loop?
+ ll $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+ and $t2, $t1, $t3 # zero the gc bits
+ bnez $t2, .Lnot_unlocked # already thin locked
+ # Unlocked case - $t1: original lock word that's zero except for the read barrier bits.
+ or $t2, $t1, $t0 # $t2 holds thread id with count of 0 with preserved read barrier bits
+ sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+ beqz $t2, .Lretry_lock # store failed, retry
nop
+ jalr $zero, $ra
+ sync # full (LoadLoad|LoadStore) memory barrier
+.Lnot_unlocked:
+ # $t1: original lock word, $t0: thread_id with count of 0 and zero read barrier bits
+ srl $t2, $t1, LOCK_WORD_STATE_SHIFT
+ bnez $t2, .Lslow_lock # if either of the top two bits are set, go slow path
+ xor $t2, $t1, $t0 # lock_word.ThreadId() ^ self->ThreadId()
+ andi $t2, $t2, 0xFFFF # zero top 16 bits
+ bnez $t2, .Lslow_lock # lock word and self thread id's match -> recursive lock
+ # otherwise contention, go to slow path
+ and $t2, $t1, $t3 # zero the gc bits
+ addu $t2, $t2, $t8 # increment count in lock word
+ srl $t2, $t2, LOCK_WORD_STATE_SHIFT # if the first gc state bit is set, we overflowed.
+ bnez $t2, .Lslow_lock # if we overflow the count go slow path
+ addu $t2, $t1, $t8 # increment count for real
+ sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+ beqz $t2, .Lretry_lock # store failed, retry
+ nop
+ jalr $zero, $ra
+ nop
+.Lslow_lock:
SETUP_SAVE_REFS_ONLY_FRAME # save callee saves in case we block
la $t9, artLockObjectFromCode
jalr $t9 # (Object* obj, Thread*)
@@ -1276,11 +1308,55 @@
.extern artUnlockObjectFromCode
ENTRY art_quick_unlock_object
beqz $a0, art_quick_throw_null_pointer_exception
+ li $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE
+ li $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED
+.Lretry_unlock:
+#ifndef USE_READ_BARRIER
+ lw $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+ ll $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0) # Need to use atomic read-modify-write for read barrier
+#endif
+ srl $t2, $t1, LOCK_WORD_STATE_SHIFT
+ bnez $t2, .Lslow_unlock # if either of the top two bits are set, go slow path
+ lw $t0, THREAD_ID_OFFSET(rSELF)
+ and $t2, $t1, $t3 # zero the gc bits
+ xor $t2, $t2, $t0 # lock_word.ThreadId() ^ self->ThreadId()
+ andi $t2, $t2, 0xFFFF # zero top 16 bits
+ bnez $t2, .Lslow_unlock # do lock word and self thread id's match?
+ and $t2, $t1, $t3 # zero the gc bits
+ bgeu $t2, $t8, .Lrecursive_thin_unlock
+ # transition to unlocked
+ nor $t2, $zero, $t3 # $t2 = LOCK_WORD_GC_STATE_MASK_SHIFTED
+ and $t2, $t1, $t2 # $t2: zero except for the preserved gc bits
+ sync # full (LoadStore|StoreStore) memory barrier
+#ifndef USE_READ_BARRIER
+ jalr $zero, $ra
+ sw $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+ sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+ beqz $t2, .Lretry_unlock # store failed, retry
nop
- SETUP_SAVE_REFS_ONLY_FRAME # save callee saves in case exception allocation triggers GC
+ jalr $zero, $ra
+ nop
+#endif
+.Lrecursive_thin_unlock:
+ # t1: original lock word
+ subu $t2, $t1, $t8 # decrement count
+#ifndef USE_READ_BARRIER
+ jalr $zero, $ra
+ sw $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+ sc $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+ beqz $t2, .Lretry_unlock # store failed, retry
+ nop
+ jalr $zero, $ra
+ nop
+#endif
+.Lslow_unlock:
+ SETUP_SAVE_REFS_ONLY_FRAME # save callee saves in case exception allocation triggers GC
la $t9, artUnlockObjectFromCode
- jalr $t9 # (Object* obj, Thread*)
- move $a1, rSELF # pass Thread::Current
+ jalr $t9 # (Object* obj, Thread*)
+ move $a1, rSELF # pass Thread::Current
RETURN_IF_ZERO
END art_quick_unlock_object