MIPS: Reintroduce hand-written rosalloc entrypoints
Reintroduced art_quick_alloc_object_resolved_rosalloc and added
art_quick_alloc_object_initialized_rosalloc for MIPS32 and MIPS64.
Test: booted MIPS32 and MIPS64 in QEMU
Test: mma test-art-target on CI20 (MIPS32R2)
Test: mma test-art-target in QEMU (MIPS64R6)
Change-Id: I51c7d0629fd005e61f93cedd3989c53efc5a90e5
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 663cb6c..10b690a 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1576,9 +1576,87 @@
// Generate the allocation entrypoints for each allocator.
GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+// A hand-written override for:
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
+.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name
+ENTRY \c_name
+ # Fast path rosalloc allocation
+ # a0: type
+ # s1: Thread::Current
+ # -----------------------------
+ # t1: object size
+ # t2: rosalloc run
+ # t3: thread stack top offset
+ # t4: thread stack bottom offset
+ # v0: free list head
+ #
+ # t5, t6 : temps
+ lw $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) # Check if thread local allocation
+ lw $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1) # stack has any room left.
+ bgeu $t3, $t4, .Lslow_path_\c_name
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
+ lw $t1, MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET($a0) # Load object size (t1).
+ li $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE # Check if size is for a thread local
+ # allocation. Also does the
+ # initialized and finalizable checks.
+ bgtu $t1, $t5, .Lslow_path_\c_name
+
+ # Compute the rosalloc bracket index from the size. Since the size is already aligned we can
+ # combine the two shifts together.
+ srl $t1, $t1, (ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
+
+ addu $t2, $t1, $s1
+ lw $t2, (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)($t2) # Load rosalloc run (t2).
+
+ # Load the free list head (v0).
+ # NOTE: this will be the return val.
+ lw $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+ beqz $v0, .Lslow_path_\c_name
+ nop
+
+ # Load the next pointer of the head and update the list head with the next pointer.
+ lw $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+ sw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+ # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+ # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+ POISON_HEAP_REF $a0
+ sw $a0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+ # Push the new object onto the thread local allocation stack and increment the thread local
+ # allocation stack top.
+ sw $v0, 0($t3)
+ addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+ sw $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+ # Decrement the size of the free list.
+ lw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+ addiu $t5, $t5, -1
+ sw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+ sync # Fence.
+
+ jalr $zero, $ra
+ nop
+
+ .Lslow_path_\c_name:
+ SETUP_SAVE_REFS_ONLY_FRAME
+ la $t9, \cxx_name
+ jalr $t9
+ move $a1, $s1 # Pass self as argument.
+ RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END \c_name
+.endm
+
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
+
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)