x86/x86-64: Rewrite `art_quick_aput_obj`.
Check for the GC marking just once and use a dedicated path
for GC marking on. Use `art_quick_read_barrier_mark_regNN`
for reference marking and remove the obsolete and slow
`READ_BARRIER` macros.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: testrunner.py --host --optimizing --gcstress
Bug: 160737021
Change-Id: I250e1bbeb5d93bc14055fc17d4eb0c6167c49e82
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 7f1311c..67ca2bb 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1254,126 +1254,102 @@
.endif
END_MACRO
- /*
- * Macro to insert read barrier, only used in art_quick_aput_obj.
- * obj_reg and dest_reg are registers, offset is a defined literal such as
- * MIRROR_OBJECT_CLASS_OFFSET.
- * pop_eax is a boolean flag, indicating if eax is popped after the call.
- * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
- */
-MACRO4(READ_BARRIER, obj_reg, offset, dest_reg, pop_eax)
-#ifdef USE_READ_BARRIER
- PUSH eax // save registers used in art_quick_aput_obj
- PUSH ebx
- PUSH edx
- PUSH ecx
- // Outgoing argument set up
- pushl MACRO_LITERAL((RAW_VAR(offset))) // pass offset, double parentheses are necessary
- CFI_ADJUST_CFA_OFFSET(4)
- PUSH RAW_VAR(obj_reg) // pass obj_reg
- PUSH eax // pass ref, just pass eax for now since parameter ref is unused
- call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj_reg, offset)
- // No need to unpoison return value in eax, artReadBarrierSlow() would do the unpoisoning.
- .ifnc RAW_VAR(dest_reg), eax
- movl %eax, REG_VAR(dest_reg) // save loaded ref in dest_reg
- .endif
- addl MACRO_LITERAL(12), %esp // pop arguments
- CFI_ADJUST_CFA_OFFSET(-12)
- POP_REG_NE ecx, RAW_VAR(dest_reg) // Restore args except dest_reg
- POP_REG_NE edx, RAW_VAR(dest_reg)
- POP_REG_NE ebx, RAW_VAR(dest_reg)
- .ifc RAW_VAR(pop_eax), true
- POP_REG_NE eax, RAW_VAR(dest_reg)
- .endif
-#else
- movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg)
- UNPOISON_HEAP_REF RAW_VAR(dest_reg)
-#endif // USE_READ_BARRIER
-END_MACRO
-
DEFINE_FUNCTION art_quick_aput_obj
test %edx, %edx // store of null
- jz .Ldo_aput_null
- READ_BARRIER eax, MIRROR_OBJECT_CLASS_OFFSET, ebx, true
- READ_BARRIER ebx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ebx, true
- // value's type == array's component type - trivial assignability
-#if defined(USE_READ_BARRIER)
- READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, false
- cmpl %eax, %ebx
- POP eax // restore eax from the push in the beginning of READ_BARRIER macro
- // This asymmetric push/pop saves a push of eax and maintains stack alignment.
-#elif defined(USE_HEAP_POISONING)
- PUSH eax // save eax
- movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax
- UNPOISON_HEAP_REF eax
- cmpl %eax, %ebx
- POP eax // restore eax
-#else
- cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ebx
-#endif
- jne .Lcheck_assignability
-.Ldo_aput:
+ jz .Laput_obj_null
+ movl MIRROR_OBJECT_CLASS_OFFSET(%eax), %ebx
+ UNPOISON_HEAP_REF ebx
+#ifdef USE_READ_BARRIER
+ cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
+ jnz .Laput_obj_gc_marking
+#endif // USE_READ_BARRIER
+ movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
+ cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ebx // Both poisoned if heap poisoning is enabled.
+ jne .Laput_obj_check_assignability
+.Laput_obj_store:
POISON_HEAP_REF edx
movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
shrl LITERAL(CARD_TABLE_CARD_SHIFT), %eax
movb %dl, (%edx, %eax)
ret
-.Ldo_aput_null:
+
+.Laput_obj_null:
movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
ret
-.Lcheck_assignability:
- PUSH eax // save arguments
- PUSH ecx
- PUSH edx
-#if defined(USE_READ_BARRIER)
- subl LITERAL(4), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(4)
- READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, true
- subl LITERAL(4), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(4)
- PUSH eax // pass arg2 - type of the value to be stored
-#elif defined(USE_HEAP_POISONING)
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
+
+.Laput_obj_check_assignability:
+ UNPOISON_HEAP_REF ebx // Unpoison array component type if poisoning is enabled.
+ PUSH_ARG eax // Save `art_quick_aput_obj()` arguments.
+ PUSH_ARG ecx
+ PUSH_ARG edx
+ INCREASE_FRAME 8 // Alignment padding.
+ // Pass arg2 - type of the value to be stored.
+#if defined(USE_HEAP_POISONING)
movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax
UNPOISON_HEAP_REF eax
- PUSH eax // pass arg2 - type of the value to be stored
+ PUSH_ARG eax
#else
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
- pushl MIRROR_OBJECT_CLASS_OFFSET(%edx) // pass arg2 - type of the value to be stored
+ pushl MIRROR_OBJECT_CLASS_OFFSET(%edx)
CFI_ADJUST_CFA_OFFSET(4)
#endif
- PUSH ebx // pass arg1 - component type of the array
+.Laput_obj_check_assignability_call:
+ PUSH_ARG ebx // Pass arg1 - component type of the array.
call SYMBOL(artIsAssignableFromCode) // (Class* a, Class* b)
- addl LITERAL(16), %esp // pop arguments
- CFI_ADJUST_CFA_OFFSET(-16)
+ DECREASE_FRAME 16 // Pop `artIsAssignableFromCode()` arguments
testl %eax, %eax
+ POP_ARG edx // Pop `art_quick_aput_obj()` arguments; flags unaffected.
+ POP_ARG ecx
+ POP_ARG eax
jz .Lthrow_array_store_exception
- POP edx
- POP ecx
- POP eax
POISON_HEAP_REF edx
- movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4) // do the aput
+ movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4) // Do the aput.
movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
shrl LITERAL(CARD_TABLE_CARD_SHIFT), %eax
movb %dl, (%edx, %eax)
ret
- CFI_ADJUST_CFA_OFFSET(12) // 3 POP after the jz for unwinding.
+
.Lthrow_array_store_exception:
- POP edx
- POP ecx
- POP eax
- SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx // save all registers as basis for long jump context
- // Outgoing argument set up
- PUSH eax // alignment padding
- pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
- CFI_ADJUST_CFA_OFFSET(4)
- PUSH edx // pass arg2 - value
- PUSH eax // pass arg1 - array
+#ifdef USE_READ_BARRIER
+ CFI_REMEMBER_STATE
+#endif // USE_READ_BARRIER
+ SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx // Save all registers as basis for long jump context.
+ // Outgoing argument set up.
+ PUSH_ARG eax // Alignment padding.
+ PUSH_ARG fs:THREAD_SELF_OFFSET // Pass Thread::Current()
+ PUSH_ARG edx // Pass arg2 - value.
+ PUSH_ARG eax // Pass arg1 - array.
call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
UNREACHABLE
+
+#ifdef USE_READ_BARRIER
+ CFI_RESTORE_STATE_AND_DEF_CFA esp, 4
+.Laput_obj_gc_marking:
+ PUSH_ARG eax // Save `art_quick_aput_obj()` arguments.
+ PUSH_ARG ecx // We need to align stack for `art_quick_read_barrier_mark_regNN`
+ PUSH_ARG edx // and use a register (EAX) as a temporary for the object class.
+ call SYMBOL(art_quick_read_barrier_mark_reg03) // Mark EBX.
+ movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
+ UNPOISON_HEAP_REF ebx
+ call SYMBOL(art_quick_read_barrier_mark_reg03) // Mark EBX.
+ movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax
+ UNPOISON_HEAP_REF eax
+ call SYMBOL(art_quick_read_barrier_mark_reg00) // Mark EAX.
+ cmpl %eax, %ebx
+ jne .Laput_obj_check_assignability_gc_marking
+ POP_ARG edx // Restore `art_quick_aput_obj()` arguments.
+ POP_ARG ecx
+ POP_ARG eax
+ jmp .Laput_obj_store
+
+.Laput_obj_check_assignability_gc_marking:
+ // Prepare arguments in line with `.Laput_obj_check_assignability_call` and jump there.
+ // (EAX, ECX and EDX were already saved in the right stack slots.)
+ INCREASE_FRAME 8 // Alignment padding.
+ PUSH_ARG eax // Pass arg2 - type of the value to be stored.
+ // The arg1 shall be pushed at `.Laput_obj_check_assignability_call`.
+ jmp .Laput_obj_check_assignability_call
+#endif // USE_READ_BARRIER
END_FUNCTION art_quick_aput_obj
DEFINE_FUNCTION art_quick_memcpy
@@ -1993,15 +1969,10 @@
// getting its argument and returning its result through register
// `reg`, saving and restoring all caller-save registers.
//
-// If `reg` is different from `eax`, the generated function follows a
-// non-standard runtime calling convention:
-// - register `reg` is used to pass the (sole) argument of this function
-// (instead of EAX);
-// - register `reg` is used to return the result of this function
-// (instead of EAX);
-// - EAX is treated like a normal (non-argument) caller-save register;
-// - everything else is the same as in the standard runtime calling
-// convention (e.g. standard callee-save registers are preserved).
+// The generated function follows a non-standard runtime calling convention:
+// - register `reg` (which may differ from EAX) is used to pass the (sole) argument,
+// - register `reg` (which may differ from EAX) is used to return the result,
+// - all other registers are callee-save (the values they hold are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
// Null check so that we can load the lock word.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 673696c..18207ae 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1163,134 +1163,89 @@
.endif
END_MACRO
- /*
- * Macro to insert read barrier, used in art_quick_aput_obj.
- * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
- * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
- * 64b PUSH/POP and 32b argument.
- * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
- *
- * As with art_quick_aput_obj function, the 64b versions are in comments.
- */
-MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
-#ifdef USE_READ_BARRIER
- PUSH rax // save registers that might be used
- PUSH rdi
- PUSH rsi
- PUSH rdx
- PUSH rcx
- SETUP_FP_CALLEE_SAVE_FRAME
- // Outgoing argument set up
- // movl REG_VAR(ref_reg32), %edi // pass ref, no-op for now since parameter ref is unused
- // // movq REG_VAR(ref_reg64), %rdi
- movl REG_VAR(obj_reg), %esi // pass obj_reg
- // movq REG_VAR(obj_reg), %rsi
- movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
- // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
- call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj_reg, offset)
- // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
- .ifnc RAW_VAR(dest_reg32), eax
- // .ifnc RAW_VAR(dest_reg64), rax
- movl %eax, REG_VAR(dest_reg32) // save loaded ref in dest_reg
- // movq %rax, REG_VAR(dest_reg64)
- .endif
- RESTORE_FP_CALLEE_SAVE_FRAME
- POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
- POP_REG_NE rdx, RAW_VAR(dest_reg64)
- POP_REG_NE rsi, RAW_VAR(dest_reg64)
- POP_REG_NE rdi, RAW_VAR(dest_reg64)
- POP_REG_NE rax, RAW_VAR(dest_reg64)
-#else
- movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
- // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
- UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
-#endif // USE_READ_BARRIER
-END_MACRO
-
DEFINE_FUNCTION art_quick_aput_obj
- testl %edx, %edx // store of null
-// test %rdx, %rdx
- jz .Ldo_aput_null
- READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
- // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
- READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
- // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
-#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
- READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax // rax is free.
- // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
- cmpl %eax, %ecx // value's type == array's component type - trivial assignability
-#else
- cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
-// cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
-#endif
- jne .Lcheck_assignability
-.Ldo_aput:
+ test %edx, %edx // store of null
+ jz .Laput_obj_null
+ movl MIRROR_OBJECT_CLASS_OFFSET(%rdi), %ecx
+ UNPOISON_HEAP_REF ecx
+#ifdef USE_READ_BARRIER
+ cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
+ jnz .Laput_obj_gc_marking
+#endif // USE_READ_BARRIER
+ movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx
+ cmpl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %ecx // Both poisoned if heap poisoning is enabled.
+ jne .Laput_obj_check_assignability
+.Laput_obj_store:
POISON_HEAP_REF edx
- movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
-// movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+ movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
-// shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
- movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero
+ movb %dl, (%rdx, %rdi)
ret
-.Ldo_aput_null:
- movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
-// movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+
+.Laput_obj_null:
+ movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
ret
-.Lcheck_assignability:
- // Save arguments.
- PUSH rdi
- PUSH rsi
- PUSH rdx
+
+.Laput_obj_check_assignability:
+ UNPOISON_HEAP_REF ecx // Unpoison array component type if poisoning is enabled.
+ PUSH_ARG rdi // Save arguments.
+ PUSH_ARG rsi
+ PUSH_ARG rdx
+ movl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %esi // Pass arg2 = value's class.
+ UNPOISON_HEAP_REF esi
+.Laput_obj_check_assignability_call:
+ movl %ecx, %edi // Pass arg1 = array's component type.
SETUP_FP_CALLEE_SAVE_FRAME
-
-#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
- // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
- movl %eax, %esi // Pass arg2 = value's class.
- // movq %rax, %rsi
-#else
- // "Uncompress" = do nothing, as already zero-extended on load.
- movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
-#endif
- movq %rcx, %rdi // Pass arg1 = array's component type.
-
call SYMBOL(artIsAssignableFromCode) // (Class* a, Class* b)
-
- // Exception?
- testq %rax, %rax
- jz .Lthrow_array_store_exception
-
- RESTORE_FP_CALLEE_SAVE_FRAME
- // Restore arguments.
- POP rdx
- POP rsi
- POP rdi
-
+ RESTORE_FP_CALLEE_SAVE_FRAME // Resore FP registers.
+ POP_ARG rdx // Restore arguments.
+ POP_ARG rsi
+ POP_ARG rdi
+ testq %rax, %rax // Check for exception.
+ jz .Laput_obj_throw_array_store_exception
POISON_HEAP_REF edx
- movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
-// movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+ movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
-// shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
- movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero
-// movb %dl, (%rdx, %rdi)
+ movb %dl, (%rdx, %rdi)
ret
- CFI_ADJUST_CFA_OFFSET(24 + 4 * 8) // Reset unwind info so following code unwinds.
-.Lthrow_array_store_exception:
- RESTORE_FP_CALLEE_SAVE_FRAME
- // Restore arguments.
- POP rdx
- POP rsi
- POP rdi
+.Laput_obj_throw_array_store_exception:
+#ifdef USE_READ_BARRIER
+ CFI_REMEMBER_STATE
+#endif // USE_READ_BARRIER
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // Save all registers as basis for long jump context.
-
// Outgoing argument set up.
movq %rdx, %rsi // Pass arg 2 = value.
movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current().
// Pass arg 1 = array.
call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
UNREACHABLE
+
+#ifdef USE_READ_BARRIER
+ CFI_RESTORE_STATE_AND_DEF_CFA esp, 4
+.Laput_obj_gc_marking:
+ // We need to align stack for `art_quick_read_barrier_mark_regNN`.
+ INCREASE_FRAME 8 // Stack alignment.
+ call SYMBOL(art_quick_read_barrier_mark_reg01) // Mark ECX
+ movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx
+ UNPOISON_HEAP_REF ecx
+ call SYMBOL(art_quick_read_barrier_mark_reg01) // Mark ECX
+ movl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %eax
+ UNPOISON_HEAP_REF eax
+ call SYMBOL(art_quick_read_barrier_mark_reg00) // Mark EAX
+ DECREASE_FRAME 8 // Remove stack alignment.
+ cmpl %eax, %ecx
+ je .Laput_obj_store
+ // Prepare arguments in line with `.Laput_obj_check_assignability_call` and jump there.
+ PUSH_ARG rdi // Save arguments.
+ PUSH_ARG rsi
+ PUSH_ARG rdx
+ movl %eax, %esi // Pass arg2 - type of the value to be stored.
+ // The arg1 shall be moved at `.Ldo_assignability_check_call`.
+ jmp .Laput_obj_check_assignability_call
+#endif // USE_READ_BARRIER
END_FUNCTION art_quick_aput_obj
// TODO: This is quite silly on X86_64 now.
@@ -1855,16 +1810,9 @@
//
// The generated function follows a non-standard runtime calling
// convention:
-// - register `reg` (which may be different from RDI) is used to pass
-// the (sole) argument of this function;
-// - register `reg` (which may be different from RAX) is used to return
-// the result of this function (instead of RAX);
-// - if `reg` is different from `rdi`, RDI is treated like a normal
-// (non-argument) caller-save register;
-// - if `reg` is different from `rax`, RAX is treated like a normal
-// (non-result) caller-save register;
-// - everything else is the same as in the standard runtime calling
-// convention (e.g. standard callee-save registers are preserved).
+// - register `reg` (which may be different from RDI) is used to pass the (sole) argument,
+// - register `reg` (which may be different from RAX) is used to return the result,
+// - all other registers are callee-save (the values they hold are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
// Null check so that we can load the lock word.