arm: Rewrite `art_quick_aput_obj`.
Test: run-gtests.sh
Test: testrunner.py --target --32 --optimizing
Test: testrunner.py --target --32 --optimizing --gcstress
Bug: 160737021
Change-Id: I32a51cee80dd33564481b9916967d6692c156d2e
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5a97572..8612300 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -637,13 +637,33 @@
.cfi_rel_offset \rReg, \offset
.endm
- /*
- * Macro to insert read barrier, only used in art_quick_aput_obj.
- * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
- * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
- */
-.macro READ_BARRIER rDest, rObj, offset
+ // Helper macros for `art_quick_aput_obj`.
#ifdef USE_READ_BARRIER
+#ifdef USE_BAKER_READ_BARRIER
+.macro BAKER_RB_CHECK_GRAY_BIT_AND_LOAD rDest, rObj, offset, gray_slow_path_label
+ ldr ip, [\rObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ tst ip, #LOCK_WORD_READ_BARRIER_STATE_MASK_SHIFTED
+ bne \gray_slow_path_label
+ // False dependency to avoid needing load/load fence.
+ add \rObj, \rObj, ip, lsr #32
+ ldr \rDest, [\rObj, #\offset]
+ UNPOISON_HEAP_REF \rDest
+.endm
+
+.macro BAKER_RB_LOAD_AND_MARK rDest, rObj, offset, mark_function
+ ldr \rDest, [\rObj, #\offset]
+ UNPOISON_HEAP_REF \rDest
+ str lr, [sp, #-8]! @ Save LR with correct stack alignment.
+ .cfi_rel_offset lr, 0
+ .cfi_adjust_cfa_offset 8
+ bl \mark_function
+ ldr lr, [sp], #8 @ Restore LR.
+ .cfi_restore lr
+ .cfi_adjust_cfa_offset -8
+.endm
+#else // USE_BAKER_READ_BARRIER
+ .extern artReadBarrierSlow
+.macro READ_BARRIER_SLOW rDest, rObj, offset
push {r0-r3, ip, lr} @ 6 words for saved registers (used in art_quick_aput_obj)
.cfi_adjust_cfa_offset 24
.cfi_rel_offset r0, 0
@@ -676,30 +696,26 @@
pop {lr} @ restore lr
.cfi_adjust_cfa_offset -4
.cfi_restore lr
-#else
- ldr \rDest, [\rObj, #\offset]
- UNPOISON_HEAP_REF \rDest
+#endif // USE_BAKER_READ_BARRIER
#endif // USE_READ_BARRIER
-.endm
-#ifdef USE_READ_BARRIER
- .extern artReadBarrierSlow
-#endif
.hidden art_quick_aput_obj
ENTRY art_quick_aput_obj
+ cbz r2, .Laput_obj_null
#ifdef USE_READ_BARRIER
- @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
- tst r2, r2
- beq .Ldo_aput_null
-#else
- cbz r2, .Ldo_aput_null
+ cmp rMR, #0
+ bne .Laput_obj_gc_marking
#endif // USE_READ_BARRIER
- READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
- READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
- READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
- cmp r3, ip @ value's type == array's component type - trivial assignability
- bne .Lcheck_assignability
-.Ldo_aput:
+ ldr r3, [r0, #MIRROR_OBJECT_CLASS_OFFSET]
+ UNPOISON_HEAP_REF r3
+ // R4 is a scratch register in managed ARM ABI.
+ ldr r4, [r2, #MIRROR_OBJECT_CLASS_OFFSET]
+ UNPOISON_HEAP_REF r4
+ ldr r3, [r3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+ UNPOISON_HEAP_REF r3
+ cmp r3, r4 @ value's type == array's component type - trivial assignability
+ bne .Laput_obj_check_assignability
+.Laput_obj_store:
add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
POISON_HEAP_REF r2
str r2, [r3, r1, lsl #2]
@@ -707,26 +723,22 @@
lsr r0, r0, #CARD_TABLE_CARD_SHIFT
strb r3, [r3, r0]
blx lr
-.Ldo_aput_null:
+
+.Laput_obj_null:
add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
str r2, [r3, r1, lsl #2]
blx lr
-.Lcheck_assignability:
+
+.Laput_obj_check_assignability:
push {r0-r2, lr} @ save arguments
.cfi_adjust_cfa_offset 16
- .cfi_rel_offset r0, 0
- .cfi_rel_offset r1, 4
- .cfi_rel_offset r2, 8
.cfi_rel_offset lr, 12
- mov r1, ip
+ mov r1, r4
mov r0, r3
bl artIsAssignableFromCode
cbz r0, .Lthrow_array_store_exception
.cfi_remember_state
pop {r0-r2, lr}
- .cfi_restore r0
- .cfi_restore r1
- .cfi_restore r2
.cfi_restore lr
.cfi_adjust_cfa_offset -16
add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
@@ -736,19 +748,60 @@
lsr r0, r0, #CARD_TABLE_CARD_SHIFT
strb r3, [r3, r0]
blx lr
+
.Lthrow_array_store_exception:
CFI_RESTORE_STATE_AND_DEF_CFA sp, 16
pop {r0-r2, lr}
- .cfi_restore r0
- .cfi_restore r1
- .cfi_restore r2
.cfi_restore lr
.cfi_adjust_cfa_offset -16
+#ifdef USE_READ_BARRIER
+ .cfi_remember_state
+#endif // USE_READ_BARRIER
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
mov r1, r2
- mov r2, rSELF @ pass Thread::Current
+ mov r2, rSELF @ Pass Thread::Current.
bl artThrowArrayStoreException @ (Class*, Class*, Thread*)
- bkpt @ unreached
+ bkpt @ Unreachable.
+
+#ifdef USE_READ_BARRIER
+ CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
+.Laput_obj_gc_marking:
+#ifdef USE_BAKER_READ_BARRIER
+ BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
+ r3, r0, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_array_class
+.Laput_obj_mark_array_class_continue:
+ BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
+ r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, .Laput_obj_mark_array_element
+.Laput_obj_mark_array_element_continue:
+ BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
+ r4, r2, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_object_class
+.Laput_obj_mark_object_class_continue:
+#else // USE_BAKER_READ_BARRIER
+ READ_BARRIER_SLOW r3, r0, MIRROR_OBJECT_CLASS_OFFSET
+ READ_BARRIER_SLOW r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
+ READ_BARRIER_SLOW r4, r2, MIRROR_OBJECT_CLASS_OFFSET
+#endif // USE_BAKER_READ_BARRIER
+
+ cmp r3, r4 @ value's type == array's component type - trivial assignability
+ // All registers are set up for correctly `.Laput_obj_check_assignability`.
+ bne .Laput_obj_check_assignability
+ b .Laput_obj_store
+
+#ifdef USE_BAKER_READ_BARRIER
+.Laput_obj_mark_array_class:
+ BAKER_RB_LOAD_AND_MARK r3, r0, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg03
+ b .Laput_obj_mark_array_class_continue
+
+.Laput_obj_mark_array_element:
+ BAKER_RB_LOAD_AND_MARK \
+ r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, art_quick_read_barrier_mark_reg03
+ b .Laput_obj_mark_array_element_continue
+
+.Laput_obj_mark_object_class:
+ BAKER_RB_LOAD_AND_MARK r4, r2, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg04
+ b .Laput_obj_mark_object_class_continue
+#endif // USE_BAKER_READ_BARRIER
+#endif // USE_READ_BARRIER
END art_quick_aput_obj
// Macro to facilitate adding new allocation entrypoints.
@@ -1926,13 +1979,9 @@
*
* If `reg` is different from `r0`, the generated function follows a
* non-standard runtime calling convention:
- * - register `reg` is used to pass the (sole) argument of this
- * function (instead of R0);
- * - register `reg` is used to return the result of this function
- * (instead of R0);
- * - R0 is treated like a normal (non-argument) caller-save register;
- * - everything else is the same as in the standard runtime calling
- * convention (e.g. standard callee-save registers are preserved).
+ * - register `reg` (which may be different from R0) is used to pass the (sole) argument,
+ * - register `reg` (which may be different from R0) is used to return the result,
+ * - all other registers are callee-save (the values they hold are preserved).
*/
.macro READ_BARRIER_MARK_REG name, reg
ENTRY \name
diff --git a/tools/cpp-define-generator/lockword.def b/tools/cpp-define-generator/lockword.def
index a170c15..5494d59 100644
--- a/tools/cpp-define-generator/lockword.def
+++ b/tools/cpp-define-generator/lockword.def
@@ -30,10 +30,8 @@
art::LockWord::kMarkBitStateMaskShifted)
ASM_DEFINE(LOCK_WORD_MARK_BIT_SHIFT,
art::LockWord::kMarkBitStateShift)
-ASM_DEFINE(LOCK_WORD_READ_BARRIER_STATE_MASK,
+ASM_DEFINE(LOCK_WORD_READ_BARRIER_STATE_MASK_SHIFTED,
art::LockWord::kReadBarrierStateMaskShifted)
-ASM_DEFINE(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED,
- art::LockWord::kReadBarrierStateMaskShiftedToggled)
ASM_DEFINE(LOCK_WORD_READ_BARRIER_STATE_SHIFT,
art::LockWord::kReadBarrierStateShift)
ASM_DEFINE(LOCK_WORD_STATE_FORWARDING_ADDRESS,