MIPS: Reduce Baker read barrier code size overhead
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-gtest
Test: testrunner.py --target --optimizing
Test: same tests as above on CI20
Test: booted MIPS32 and MIPS64 in QEMU with poisoning
in configurations:
- with Baker read barrier thunks
- without Baker read barrier thunks
- ART_READ_BARRIER_TYPE=TABLELOOKUP
Change-Id: I79f320bf8862a04215c76cfeff3118ebc87f7ef2
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 00e3d67..d9abaa0 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -2721,6 +2721,385 @@
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, $s8
// RA (register 31) is reserved.
+// Caller code:
+// Short constant offset/index:
+// R2: | R6:
+// lw $t9, pReadBarrierMarkReg00
+// beqz $t9, skip_call | beqzc $t9, skip_call
+// addiu $t9, $t9, thunk_disp | nop
+// jalr $t9 | jialc $t9, thunk_disp
+// nop |
+// skip_call: | skip_call:
+// lw `out`, ofs(`obj`) | lw `out`, ofs(`obj`)
+// [subu `out`, $zero, `out`] | [subu `out`, $zero, `out`] # Unpoison reference.
+.macro BRB_FIELD_SHORT_OFFSET_ENTRY obj
+1:
+ # Explicit null check. May be redundant (for array elements or when the field
+ # offset is larger than the page size, 4KB).
+ # $ra will be adjusted to point to lw's stack map when throwing NPE.
+ beqz \obj, .Lintrospection_throw_npe
+#if defined(_MIPS_ARCH_MIPS32R6)
+ lapc $gp, .Lintrospection_exits # $gp = address of .Lintrospection_exits.
+#else
+ addiu $gp, $t9, (.Lintrospection_exits - 1b) # $gp = address of .Lintrospection_exits.
+#endif
+ .set push
+ .set noat
+ lw $at, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj)
+ sll $at, $at, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT # Move barrier state bit
+ # to sign bit.
+ bltz $at, .Lintrospection_field_array # If gray, load reference, mark.
+ move $t8, \obj # Move `obj` to $t8 for common code.
+ .set pop
+ jalr $zero, $ra # Otherwise, load-load barrier and return.
+ sync
+.endm
+
+// Caller code (R2):
+// Long constant offset/index: | Variable index:
+// lw $t9, pReadBarrierMarkReg00
+// lui $t8, ofs_hi | sll $t8, `index`, 2
+// beqz $t9, skip_call | beqz $t9, skip_call
+// addiu $t9, $t9, thunk_disp | addiu $t9, $t9, thunk_disp
+// jalr $t9 | jalr $t9
+// skip_call: | skip_call:
+// addu $t8, $t8, `obj` | addu $t8, $t8, `obj`
+// lw `out`, ofs_lo($t8) | lw `out`, ofs($t8)
+// [subu `out`, $zero, `out`] | [subu `out`, $zero, `out`] # Unpoison reference.
+//
+// Caller code (R6):
+// Long constant offset/index: | Variable index:
+// lw $t9, pReadBarrierMarkReg00
+// beqz $t9, skip_call | beqz $t9, skip_call
+// aui $t8, `obj`, ofs_hi | lsa $t8, `index`, `obj`, 2
+// jialc $t9, thunk_disp | jialc $t9, thunk_disp
+// skip_call: | skip_call:
+// lw `out`, ofs_lo($t8) | lw `out`, ofs($t8)
+// [subu `out`, $zero, `out`] | [subu `out`, $zero, `out`] # Unpoison reference.
+.macro BRB_FIELD_LONG_OFFSET_ENTRY obj
+1:
+ # No explicit null check for variable indices or large constant indices/offsets
+ # as it must have been done earlier.
+#if defined(_MIPS_ARCH_MIPS32R6)
+ lapc $gp, .Lintrospection_exits # $gp = address of .Lintrospection_exits.
+#else
+ addiu $gp, $t9, (.Lintrospection_exits - 1b) # $gp = address of .Lintrospection_exits.
+#endif
+ .set push
+ .set noat
+ lw $at, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj)
+ sll $at, $at, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT # Move barrier state bit
+ # to sign bit.
+ bltz $at, .Lintrospection_field_array # If gray, load reference, mark.
+ nop
+ .set pop
+ jalr $zero, $ra # Otherwise, load-load barrier and return.
+ sync
+ break # Padding to 8 instructions.
+.endm
+
+.macro BRB_GC_ROOT_ENTRY root
+1:
+#if defined(_MIPS_ARCH_MIPS32R6)
+ lapc $gp, .Lintrospection_exit_\root # $gp = exit point address.
+#else
+ addiu $gp, $t9, (.Lintrospection_exit_\root - 1b) # $gp = exit point address.
+#endif
+ bnez \root, .Lintrospection_common
+ move $t8, \root # Move reference to $t8 for common code.
+ jalr $zero, $ra # Return if null.
+ # The next instruction (from the following BRB_GC_ROOT_ENTRY) fills the delay slot.
+ # This instruction has no effect (actual NOP for the last entry; otherwise changes $gp,
+ # which is unused after that anyway).
+.endm
+
+.macro BRB_FIELD_EXIT out
+.Lintrospection_exit_\out:
+ jalr $zero, $ra
+ move \out, $t8 # Return reference in expected register.
+.endm
+
+.macro BRB_FIELD_EXIT_BREAK
+ break
+ break
+.endm
+
+ENTRY_NO_GP art_quick_read_barrier_mark_introspection
+ # Entry points for offsets/indices not fitting into int16_t and for variable indices.
+ BRB_FIELD_LONG_OFFSET_ENTRY $v0
+ BRB_FIELD_LONG_OFFSET_ENTRY $v1
+ BRB_FIELD_LONG_OFFSET_ENTRY $a0
+ BRB_FIELD_LONG_OFFSET_ENTRY $a1
+ BRB_FIELD_LONG_OFFSET_ENTRY $a2
+ BRB_FIELD_LONG_OFFSET_ENTRY $a3
+ BRB_FIELD_LONG_OFFSET_ENTRY $t0
+ BRB_FIELD_LONG_OFFSET_ENTRY $t1
+ BRB_FIELD_LONG_OFFSET_ENTRY $t2
+ BRB_FIELD_LONG_OFFSET_ENTRY $t3
+ BRB_FIELD_LONG_OFFSET_ENTRY $t4
+ BRB_FIELD_LONG_OFFSET_ENTRY $t5
+ BRB_FIELD_LONG_OFFSET_ENTRY $t6
+ BRB_FIELD_LONG_OFFSET_ENTRY $t7
+ BRB_FIELD_LONG_OFFSET_ENTRY $s2
+ BRB_FIELD_LONG_OFFSET_ENTRY $s3
+ BRB_FIELD_LONG_OFFSET_ENTRY $s4
+ BRB_FIELD_LONG_OFFSET_ENTRY $s5
+ BRB_FIELD_LONG_OFFSET_ENTRY $s6
+ BRB_FIELD_LONG_OFFSET_ENTRY $s7
+ BRB_FIELD_LONG_OFFSET_ENTRY $s8
+
+ # Entry points for offsets/indices fitting into int16_t.
+ BRB_FIELD_SHORT_OFFSET_ENTRY $v0
+ BRB_FIELD_SHORT_OFFSET_ENTRY $v1
+ BRB_FIELD_SHORT_OFFSET_ENTRY $a0
+ BRB_FIELD_SHORT_OFFSET_ENTRY $a1
+ BRB_FIELD_SHORT_OFFSET_ENTRY $a2
+ BRB_FIELD_SHORT_OFFSET_ENTRY $a3
+ BRB_FIELD_SHORT_OFFSET_ENTRY $t0
+ BRB_FIELD_SHORT_OFFSET_ENTRY $t1
+ BRB_FIELD_SHORT_OFFSET_ENTRY $t2
+ BRB_FIELD_SHORT_OFFSET_ENTRY $t3
+ BRB_FIELD_SHORT_OFFSET_ENTRY $t4
+ BRB_FIELD_SHORT_OFFSET_ENTRY $t5
+ BRB_FIELD_SHORT_OFFSET_ENTRY $t6
+ BRB_FIELD_SHORT_OFFSET_ENTRY $t7
+ BRB_FIELD_SHORT_OFFSET_ENTRY $s2
+ BRB_FIELD_SHORT_OFFSET_ENTRY $s3
+ BRB_FIELD_SHORT_OFFSET_ENTRY $s4
+ BRB_FIELD_SHORT_OFFSET_ENTRY $s5
+ BRB_FIELD_SHORT_OFFSET_ENTRY $s6
+ BRB_FIELD_SHORT_OFFSET_ENTRY $s7
+ BRB_FIELD_SHORT_OFFSET_ENTRY $s8
+
+ .global art_quick_read_barrier_mark_introspection_gc_roots
+art_quick_read_barrier_mark_introspection_gc_roots:
+ # Entry points for GC roots.
+ BRB_GC_ROOT_ENTRY $v0
+ BRB_GC_ROOT_ENTRY $v1
+ BRB_GC_ROOT_ENTRY $a0
+ BRB_GC_ROOT_ENTRY $a1
+ BRB_GC_ROOT_ENTRY $a2
+ BRB_GC_ROOT_ENTRY $a3
+ BRB_GC_ROOT_ENTRY $t0
+ BRB_GC_ROOT_ENTRY $t1
+ BRB_GC_ROOT_ENTRY $t2
+ BRB_GC_ROOT_ENTRY $t3
+ BRB_GC_ROOT_ENTRY $t4
+ BRB_GC_ROOT_ENTRY $t5
+ BRB_GC_ROOT_ENTRY $t6
+ BRB_GC_ROOT_ENTRY $t7
+ BRB_GC_ROOT_ENTRY $s2
+ BRB_GC_ROOT_ENTRY $s3
+ BRB_GC_ROOT_ENTRY $s4
+ BRB_GC_ROOT_ENTRY $s5
+ BRB_GC_ROOT_ENTRY $s6
+ BRB_GC_ROOT_ENTRY $s7
+ BRB_GC_ROOT_ENTRY $s8
+ .global art_quick_read_barrier_mark_introspection_end_of_entries
+art_quick_read_barrier_mark_introspection_end_of_entries:
+ nop # Fill the delay slot of the last BRB_GC_ROOT_ENTRY.
+
+.Lintrospection_throw_npe:
+ b art_quick_throw_null_pointer_exception
+ addiu $ra, $ra, 4 # Skip lw, make $ra point to lw's stack map.
+
+ .set push
+ .set noat
+
+ // Fields and array elements.
+
+.Lintrospection_field_array:
+ // Get the field/element address using $t8 and the offset from the lw instruction.
+ lh $at, 0($ra) # $ra points to lw: $at = field/element offset.
+ addiu $ra, $ra, 4 + HEAP_POISON_INSTR_SIZE # Skip lw(+subu).
+ addu $t8, $t8, $at # $t8 = field/element address.
+
+ // Calculate the address of the exit point, store it in $gp and load the reference into $t8.
+ lb $at, (-HEAP_POISON_INSTR_SIZE - 2)($ra) # $ra-HEAP_POISON_INSTR_SIZE-4 points to
+ # "lw `out`, ...".
+ andi $at, $at, 31 # Extract `out` from lw.
+ sll $at, $at, 3 # Multiply `out` by the exit point size (BRB_FIELD_EXIT* macros).
+
+ lw $t8, 0($t8) # $t8 = reference.
+ UNPOISON_HEAP_REF $t8
+
+ // Return if null reference.
+ bnez $t8, .Lintrospection_common
+ addu $gp, $gp, $at # $gp = address of the exit point.
+
+ // Early return through the exit point.
+.Lintrospection_return_early:
+ jalr $zero, $gp # Move $t8 to `out` and return.
+ nop
+
+ // Code common for GC roots, fields and array elements.
+
+.Lintrospection_common:
+ // Check lock word for mark bit, if marked return.
+ lw $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET($t8)
+ sll $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT # Move mark bit to sign bit.
+ bltz $at, .Lintrospection_return_early
+#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
+ // The below code depends on the lock word state being in the highest bits
+ // and the "forwarding address" state having all bits set.
+#error "Unexpected lock word state shift or forwarding address state value."
+#endif
+ // Test that both the forwarding state bits are 1.
+ sll $at, $t9, 1
+ and $at, $at, $t9 # Sign bit = 1 IFF both bits are 1.
+ bgez $at, .Lintrospection_mark
+ nop
+
+ .set pop
+
+ // Shift left by the forwarding address shift. This clears out the state bits since they are
+ // in the top 2 bits of the lock word.
+ jalr $zero, $gp # Move $t8 to `out` and return.
+ sll $t8, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+
+.Lintrospection_mark:
+ // Partially set up the stack frame preserving only $ra.
+ addiu $sp, $sp, -160 # Includes 16 bytes of space for argument registers $a0-$a3.
+ .cfi_adjust_cfa_offset 160
+ sw $ra, 156($sp)
+ .cfi_rel_offset 31, 156
+
+ // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
+ bal 1f
+ sw $gp, 152($sp) # Preserve the exit point address.
+1:
+ .cpload $ra
+
+ // Finalize the stack frame and call.
+ sw $t7, 148($sp)
+ .cfi_rel_offset 15, 148
+ sw $t6, 144($sp)
+ .cfi_rel_offset 14, 144
+ sw $t5, 140($sp)
+ .cfi_rel_offset 13, 140
+ sw $t4, 136($sp)
+ .cfi_rel_offset 12, 136
+ sw $t3, 132($sp)
+ .cfi_rel_offset 11, 132
+ sw $t2, 128($sp)
+ .cfi_rel_offset 10, 128
+ sw $t1, 124($sp)
+ .cfi_rel_offset 9, 124
+ sw $t0, 120($sp)
+ .cfi_rel_offset 8, 120
+ sw $a3, 116($sp)
+ .cfi_rel_offset 7, 116
+ sw $a2, 112($sp)
+ .cfi_rel_offset 6, 112
+ sw $a1, 108($sp)
+ .cfi_rel_offset 5, 108
+ sw $a0, 104($sp)
+ .cfi_rel_offset 4, 104
+ sw $v1, 100($sp)
+ .cfi_rel_offset 3, 100
+ sw $v0, 96($sp)
+ .cfi_rel_offset 2, 96
+
+ la $t9, artReadBarrierMark
+
+ sdc1 $f18, 88($sp)
+ sdc1 $f16, 80($sp)
+ sdc1 $f14, 72($sp)
+ sdc1 $f12, 64($sp)
+ sdc1 $f10, 56($sp)
+ sdc1 $f8, 48($sp)
+ sdc1 $f6, 40($sp)
+ sdc1 $f4, 32($sp)
+ sdc1 $f2, 24($sp)
+ sdc1 $f0, 16($sp)
+
+ jalr $t9 # $v0 <- artReadBarrierMark(reference)
+ move $a0, $t8 # Pass reference in $a0.
+ move $t8, $v0
+
+ lw $ra, 156($sp)
+ .cfi_restore 31
+ lw $gp, 152($sp) # $gp = address of the exit point.
+ lw $t7, 148($sp)
+ .cfi_restore 15
+ lw $t6, 144($sp)
+ .cfi_restore 14
+ lw $t5, 140($sp)
+ .cfi_restore 13
+ lw $t4, 136($sp)
+ .cfi_restore 12
+ lw $t3, 132($sp)
+ .cfi_restore 11
+ lw $t2, 128($sp)
+ .cfi_restore 10
+ lw $t1, 124($sp)
+ .cfi_restore 9
+ lw $t0, 120($sp)
+ .cfi_restore 8
+ lw $a3, 116($sp)
+ .cfi_restore 7
+ lw $a2, 112($sp)
+ .cfi_restore 6
+ lw $a1, 108($sp)
+ .cfi_restore 5
+ lw $a0, 104($sp)
+ .cfi_restore 4
+ lw $v1, 100($sp)
+ .cfi_restore 3
+ lw $v0, 96($sp)
+ .cfi_restore 2
+
+ ldc1 $f18, 88($sp)
+ ldc1 $f16, 80($sp)
+ ldc1 $f14, 72($sp)
+ ldc1 $f12, 64($sp)
+ ldc1 $f10, 56($sp)
+ ldc1 $f8, 48($sp)
+ ldc1 $f6, 40($sp)
+ ldc1 $f4, 32($sp)
+ ldc1 $f2, 24($sp)
+ ldc1 $f0, 16($sp)
+
+ // Return through the exit point.
+ jalr $zero, $gp # Move $t8 to `out` and return.
+ addiu $sp, $sp, 160
+ .cfi_adjust_cfa_offset -160
+
+.Lintrospection_exits:
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT $v0
+ BRB_FIELD_EXIT $v1
+ BRB_FIELD_EXIT $a0
+ BRB_FIELD_EXIT $a1
+ BRB_FIELD_EXIT $a2
+ BRB_FIELD_EXIT $a3
+ BRB_FIELD_EXIT $t0
+ BRB_FIELD_EXIT $t1
+ BRB_FIELD_EXIT $t2
+ BRB_FIELD_EXIT $t3
+ BRB_FIELD_EXIT $t4
+ BRB_FIELD_EXIT $t5
+ BRB_FIELD_EXIT $t6
+ BRB_FIELD_EXIT $t7
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT $s2
+ BRB_FIELD_EXIT $s3
+ BRB_FIELD_EXIT $s4
+ BRB_FIELD_EXIT $s5
+ BRB_FIELD_EXIT $s6
+ BRB_FIELD_EXIT $s7
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT_BREAK
+ BRB_FIELD_EXIT $s8
+ BRB_FIELD_EXIT_BREAK
+END art_quick_read_barrier_mark_introspection
+
.extern artInvokePolymorphic
ENTRY art_quick_invoke_polymorphic
SETUP_SAVE_REFS_AND_ARGS_FRAME