Align allocation entrypoints implementation between arm/arm64/x86/x64.
x64:
- Add art_quick_alloc_initialized_rosalloc
x86:
- Add art_quick_alloc_initialized_rosalloc
- Add art_quick_alloc_initialized{_region}_tlab
- Add art_quick_alloc_array_resolved{8,16,32,64}{_region}_tlab
arm32:
- Add art_quick_alloc_initialized_rosalloc
- Add art_quick_alloc_initialized{_region}_tlab
- Add art_quick_alloc_array_resolved{8,16,32,64}{_region}_tlab
arm64:
- Add art_quick_alloc_initialized_rosalloc
- Add art_quick_alloc_initialized{_region}_tlab
- Add art_quick_alloc_array_resolved{8,16,32,64}_tlab
Test: test-art-target test-art-host
bug: 30933338
Change-Id: I0dd8667a2921dd0b3403bea5d05304ba5d40627f
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f5b6ebe..1893029 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3993,8 +3993,11 @@
void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) {
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
+ QuickEntrypointEnum entrypoint =
+ CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+ DCHECK(!codegen_->IsLeafMethod());
}
void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1b74316..3e795c7 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4214,7 +4214,9 @@
void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
+ QuickEntrypointEnum entrypoint =
+ CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
}
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index ed36436..a443a40 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1086,11 +1086,37 @@
DELIVER_PENDING_EXCEPTION_FRAME_READY
END art_quick_resolve_string
+
// Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
+// Comment out allocators that have arm specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
-ENTRY art_quick_alloc_object_resolved_rosalloc
+.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name
+ENTRY \c_name
// Fast path rosalloc allocation.
// r0: type/return value, r9: Thread::Current
// r1, r2, r3, r12: free.
@@ -1099,13 +1125,13 @@
// TODO: consider using ldrd.
ldr r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
cmp r3, r12
- bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ bhs .Lslow_path\c_name
ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3)
cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread
// local allocation. Also does the
// initialized and finalizable checks.
- bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ bhs .Lslow_path\c_name
// Compute the rosalloc bracket index
// from the size. Since the size is
// already aligned we can combine the
@@ -1119,7 +1145,7 @@
// Load the free list head (r3). This
// will be the return val.
ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
- cbz r3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ cbz r3, .Lslow_path\c_name
// "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head
// and update the list head with the
@@ -1164,16 +1190,20 @@
mov r0, r3 // Set the return value and return.
bx lr
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
+.Lslow_path\c_name:
SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC
mov r1, r9 @ pass Thread::Current
- bl artAllocObjectFromCodeResolvedRosAlloc @ (mirror::Class* cls, Thread*)
+ bl \cxx_name @ (mirror::Class* cls, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_rosalloc
+END \c_name
+.endm
-// The common fast path code for art_quick_alloc_object_resolved_tlab
-// and art_quick_alloc_object_resolved_region_tlab.
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
+
+// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
+// and art_quick_alloc_object_resolved/initialized_region_tlab.
//
// r0: type r9: Thread::Current, r1, r2, r3, r12: free.
// Need to preserve r0 to the slow path.
@@ -1212,41 +1242,173 @@
bx lr
.endm
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_tlab, TLAB).
-ENTRY art_quick_alloc_object_resolved_tlab
+// The common code for art_quick_alloc_object_*region_tlab
+.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint
+ENTRY \name
// Fast path tlab allocation.
// r0: type, r9: Thread::Current
// r1, r2, r3, r12: free.
-#if defined(USE_READ_BARRIER)
- mvn r0, #0 // Read barrier not supported here.
- bx lr // Return -1.
-#endif
- ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
+ ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name
+.Lslow_path\name:
SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC.
mov r1, r9 // Pass Thread::Current.
- bl artAllocObjectFromCodeResolvedTLAB // (mirror::Class* klass, Thread*)
+ bl \entrypoint // (mirror::Class* klass, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_tlab
+END \name
+.endm
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-ENTRY art_quick_alloc_object_resolved_region_tlab
- // Fast path tlab allocation.
- // r0: type, r9: Thread::Current, r1, r2, r3, r12: free.
-#if !defined(USE_READ_BARRIER)
- eor r0, r0, r0 // Read barrier must be enabled here.
- sub r0, r0, #1 // Return -1.
- bx lr
+GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB
+GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB
+GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB
+GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB
+
+
+// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
+// and art_quick_alloc_array_resolved/initialized_region_tlab.
+//
+// r0: type r1: component_count r2: total_size r9: Thread::Current, r3, r12: free.
+// Need to preserve r0 and r1 to the slow path.
+.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
+ and r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED // Apply alignemnt mask
+ // (addr + 7) & ~7.
+
+ // Load thread_local_pos (r3) and
+ // thread_local_end (r12) with ldrd.
+ // Check constraints for ldrd.
+#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
+#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
#endif
- ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
-.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
- SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC.
- mov r1, r9 // Pass Thread::Current.
- bl artAllocObjectFromCodeResolvedRegionTLAB // (mirror::Class* klass, Thread*)
+ ldrd r3, r12, [r9, #THREAD_LOCAL_POS_OFFSET]
+ sub r12, r12, r3 // Compute the remaining buf size.
+ cmp r2, r12 // Check if the total_size fits.
+ bhi \slowPathLabel
+ // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
+ add r2, r2, r3
+ str r2, [r9, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos.
+ ldr r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects.
+ add r2, r2, #1
+ str r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
+ POISON_HEAP_REF r0
+ str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer.
+ str r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET] // Store the array length.
+ // Fence. This is "ish" not "ishst" so
+ // that the code after this allocation
+ // site will see the right values in
+ // the fields of the class.
+ // Alternatively we could use "ishst"
+ // if we use load-acquire for the
+ // object size load.)
+ mov r0, r3
+ dmb ish
+ bx lr
+.endm
+
+.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
+ENTRY \name
+ // Fast path array allocation for region tlab allocation.
+ // r0: mirror::Class* type
+ // r1: int32_t component_count
+ // r9: thread
+ // r2, r3, r12: free.
+ \size_setup .Lslow_path\name
+ ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name
+.Lslow_path\name:
+ // r0: mirror::Class* klass
+ // r1: int32_t component_count
+ // r2: Thread* self
+ SETUP_SAVE_REFS_ONLY_FRAME r2 // save callee saves in case of GC
+ mov r2, r9 // pass Thread::Current
+ bl \entrypoint
RESTORE_SAVE_REFS_ONLY_FRAME
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_region_tlab
+END \name
+.endm
+
+.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
+ bkpt // We should never enter here.
+ // Code below is for reference.
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
+ cmp r1, r2
+ bhi \slow_path
+ // Array classes are never finalizable
+ // or uninitialized, no need to check.
+ ldr r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
+ UNPOISON_HEAP_REF r3
+ ldr r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+ lsr r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
+ // bits.
+ lsl r2, r1, r3 // Calculate data size
+ // Add array data offset and alignment.
+ add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
+#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
+#error Long array data offset must be 4 greater than int array data offset.
+#endif
+
+ add r3, r3, #1 // Add 4 to the length only if the
+ // component size shift is 3
+ // (for 64 bit alignment).
+ and r3, r3, #4
+ add r2, r2, r3
+.endm
+
+.macro COMPUTE_ARRAY_SIZE_8 slow_path
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
+ cmp r1, r2
+ bhi \slow_path
+ // Add array data offset and alignment.
+ add r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
+.endm
+
+.macro COMPUTE_ARRAY_SIZE_16 slow_path
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
+ cmp r1, r2
+ bhi \slow_path
+ lsl r2, r1, #1
+ // Add array data offset and alignment.
+ add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
+.endm
+
+.macro COMPUTE_ARRAY_SIZE_32 slow_path
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
+ cmp r1, r2
+ bhi \slow_path
+ lsl r2, r1, #2
+ // Add array data offset and alignment.
+ add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
+.endm
+
+.macro COMPUTE_ARRAY_SIZE_64 slow_path
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
+ cmp r1, r2
+ bhi \slow_path
+ lsl r2, r1, #3
+ // Add array data offset and alignment.
+ add r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
+.endm
+
+# TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove
+# the entrypoint once all backends have been updated to use the size variants.
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
/*
* Called by managed code when the value in rSUSPEND has been decremented to 0.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 6a2034f..219d8b4 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1626,7 +1626,7 @@
END art_quick_resolve_string
// Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
// Comment out allocators that have arm64 specific asm.
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
@@ -1640,8 +1640,20 @@
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
-ENTRY art_quick_alloc_object_resolved_rosalloc
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
+
+.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name
+ENTRY \c_name
// Fast path rosalloc allocation.
// x0: type, xSELF(x19): Thread::Current
// x1-x7: free.
@@ -1650,13 +1662,13 @@
// ldp won't work due to large offset.
ldr x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
cmp x3, x4
- bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ bhs .Lslow_path\c_name
ldr w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3)
cmp x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread
// local allocation. Also does the
// finalizable and initialization
// checks.
- bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ bhs .Lslow_path\c_name
// Compute the rosalloc bracket index
// from the size. Since the size is
// already aligned we can combine the
@@ -1669,7 +1681,7 @@
// Load the free list head (x3). This
// will be the return val.
ldr x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
- cbz x3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ cbz x3, .Lslow_path\c_name
// "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
ldr x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head
// and update the list head with the
@@ -1713,13 +1725,65 @@
mov x0, x3 // Set the return value and return.
ret
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
+.Lslow_path\c_name:
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
mov x1, xSELF // pass Thread::Current
- bl artAllocObjectFromCodeResolvedRosAlloc // (mirror::Class* klass, Thread*)
+ bl \cxx_name
RESTORE_SAVE_REFS_ONLY_FRAME
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_rosalloc
+END \c_name
+.endm
+
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
+
+.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
+ ldr x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
+ ldr x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
+ ldr w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7).
+ add x6, x4, x7 // Add object size to tlab pos.
+ cmp x6, x5 // Check if it fits, overflow works
+ // since the tlab pos and end are 32
+ // bit values.
+ bhi \slowPathLabel
+ str x6, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos.
+ ldr x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects.
+ add x5, x5, #1
+ str x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
+ POISON_HEAP_REF w0
+ str w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer.
+ // Fence. This is "ish" not "ishst" so
+ // that the code after this allocation
+ // site will see the right values in
+ // the fields of the class.
+ // Alternatively we could use "ishst"
+ // if we use load-acquire for the
+ // object size load.)
+ mov x0, x4
+ dmb ish
+ ret
+.endm
+
+// The common code for art_quick_alloc_object_*region_tlab
+.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint
+ENTRY \name
+ // Fast path region tlab allocation.
+ // x0: type, xSELF(x19): Thread::Current
+ // x1-x7: free.
+ ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name
+.Lslow_path\name:
+ SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC.
+ mov x1, xSELF // Pass Thread::Current.
+ bl \entrypoint // (mirror::Class*, Thread*)
+ RESTORE_SAVE_REFS_ONLY_FRAME
+ RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END \name
+.endm
+
+GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB
+GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB
+GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB
+GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB
.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
and \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignemnt mask
@@ -1759,93 +1823,12 @@
ret
.endm
-// TODO: delete ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since it is the same as
-// ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED.
-.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
- ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel
-.endm
-
-.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
- ldr x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
- ldr x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
- ldr w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7).
- add x6, x4, x7 // Add object size to tlab pos.
- cmp x6, x5 // Check if it fits, overflow works
- // since the tlab pos and end are 32
- // bit values.
- bhi \slowPathLabel
- str x6, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos.
- ldr x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects.
- add x5, x5, #1
- str x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
- POISON_HEAP_REF w0
- str w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer.
- // Fence. This is "ish" not "ishst" so
- // that the code after this allocation
- // site will see the right values in
- // the fields of the class.
- // Alternatively we could use "ishst"
- // if we use load-acquire for the
- // object size load.)
- mov x0, x4
- dmb ish
- ret
-.endm
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB).
-ENTRY art_quick_alloc_object_resolved_tlab
- // Fast path tlab allocation.
- // x0: type, xSELF(x19): Thread::Current
- // x1-x7: free.
-#if defined(USE_READ_BARRIER)
- mvn x0, xzr // Read barrier not supported here.
- ret // Return -1.
-#endif
- ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
- SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC.
- mov x1, xSELF // Pass Thread::Current.
- bl artAllocObjectFromCodeResolvedTLAB // (mirror::Class*, Thread*)
- RESTORE_SAVE_REFS_ONLY_FRAME
- RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_tlab
-
-// The common code for art_quick_alloc_object_*region_tlab
-.macro GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB name, entrypoint, fast_path
-ENTRY \name
- // Fast path region tlab allocation.
- // x0: type, xSELF(x19): Thread::Current
- // x1-x7: free.
-#if !defined(USE_READ_BARRIER)
- mvn x0, xzr // Read barrier must be enabled here.
- ret // Return -1.
-#endif
-.Ldo_allocation\name:
- \fast_path .Lslow_path\name
-.Lslow_path\name:
- SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC.
- mov x1, xSELF // Pass Thread::Current.
- bl \entrypoint // (mirror::Class*, Thread*)
- RESTORE_SAVE_REFS_ONLY_FRAME
- RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END \name
-.endm
-
-GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED
-GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED
-
-// TODO: We could use this macro for the normal tlab allocator too.
-
-.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, size_setup
+.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
ENTRY \name
// Fast path array allocation for region tlab allocation.
// x0: mirror::Class* type
// x1: int32_t component_count
// x2-x7: free.
-#if !defined(USE_READ_BARRIER)
- mvn x0, xzr // Read barrier must be enabled here.
- ret // Return -1.
-#endif
mov x3, x0
\size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
@@ -1904,17 +1887,21 @@
.macro COMPUTE_ARRAY_SIZE_64 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
lsl \xTemp1, \xCount, #3
// Add array data offset and alignment.
- // Add 4 to the size for 64 bit alignment.
- add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK + 4)
+ add \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm
# TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
# the entrypoint once all backends have been updated to use the size variants.
-GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
-GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
-GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
-GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
-GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
/*
* Called by managed code when the thread has been asked to suspend.
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 2d5eca0..663cb6c 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1578,6 +1578,7 @@
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index f3629d9..5fee575 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1534,6 +1534,7 @@
GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 9204d85..2b3525b 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -145,7 +145,7 @@
// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_rosalloc, RosAlloc)
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 47dc34a..76615e8 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -947,10 +947,37 @@
END_MACRO
// Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
+
+// Comment out allocators that have x86 specific asm.
+// Region TLAB:
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+// Normal TLAB:
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
-DEFINE_FUNCTION art_quick_alloc_object_resolved_rosalloc
+MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
+ DEFINE_FUNCTION VAR(c_name)
// Fast path rosalloc allocation.
// eax: type/return value
// ecx, ebx, edx: free
@@ -959,14 +986,14 @@
// stack has room
movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %ecx
cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %ecx
- jae .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ jae .Lslow_path\c_name
movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%eax), %ecx // Load the object size (ecx)
// Check if the size is for a thread
// local allocation. Also does the
// finalizable and initialization check.
cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %ecx
- ja .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ ja .Lslow_path\c_name
shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %ecx // Calculate the rosalloc bracket index
// from object size.
// Load thread local rosalloc run (ebx)
@@ -977,7 +1004,7 @@
// Load free_list head (edi),
// this will be the return value.
movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %ecx
- jecxz .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ jecxz .Lslow_path\c_name
// Point of no slow path. Won't go to
// the slow path from here on.
// Load the next pointer of the head
@@ -1008,7 +1035,7 @@
// No fence needed for x86.
movl %ecx, %eax // Move object to return register
ret
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
+.Lslow_path\c_name:
SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx // save ref containing registers for GC
// Outgoing argument set up
subl LITERAL(8), %esp // alignment padding
@@ -1020,10 +1047,14 @@
CFI_ADJUST_CFA_OFFSET(-16)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception
-END_FUNCTION art_quick_alloc_object_resolved_rosalloc
+ END_FUNCTION VAR(c_name)
+END_MACRO
-// The common fast path code for art_quick_alloc_object_resolved_tlab
-// and art_quick_alloc_object_resolved_region_tlab.
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
+
+// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
+// and art_quick_alloc_object_resolved/initialized_region_tlab.
//
// EAX: type/return_value
MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
@@ -1047,8 +1078,8 @@
ret // Fast path succeeded.
END_MACRO
-// The common slow path code for art_quick_alloc_object_resolved_tlab
-// and art_quick_alloc_object_resolved_region_tlab.
+// The common slow path code for art_quick_alloc_object_resolved/initialized_tlab
+// and art_quick_alloc_object_resolved/initialized_region_tlab.
MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH, cxx_name)
POP edi
SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx // save ref containing registers for GC
@@ -1065,33 +1096,154 @@
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception
END_MACRO
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be called
-// for CC if the GC is not marking.
-DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
+MACRO2(ART_QUICK_ALLOC_OBJECT_TLAB, c_name, cxx_name)
+ DEFINE_FUNCTION VAR(c_name)
// Fast path tlab allocation.
// EAX: type
// EBX, ECX, EDX: free.
PUSH edi
- ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
- ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
-END_FUNCTION art_quick_alloc_object_resolved_tlab
+ ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\c_name
+.Lslow_path\c_name:
+ ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH RAW_VAR(cxx_name)
+ END_FUNCTION VAR(c_name)
+END_MACRO
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
-DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
- // Fast path region tlab allocation.
- // EAX: type/return value
- // EBX, ECX, EDX: free.
-#if !defined(USE_READ_BARRIER)
+ART_QUICK_ALLOC_OBJECT_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB
+ART_QUICK_ALLOC_OBJECT_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB
+ART_QUICK_ALLOC_OBJECT_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB
+ART_QUICK_ALLOC_OBJECT_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB
+
+// The fast path code for art_quick_alloc_array_region_tlab.
+// Inputs: EAX: the class, ECX: int32_t component_count, EDX: total_size
+// Free temp: EBX
+// Output: EAX: return value.
+MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
+ mov %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread
+ // Mask out the unaligned part to make sure we are 8 byte aligned.
+ andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %edx
+ movl THREAD_LOCAL_END_OFFSET(%ebx), %edi
+ subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi
+ cmpl %edi, %edx // Check if it fits.
+ ja RAW_VAR(slowPathLabel)
+ movl THREAD_LOCAL_POS_OFFSET(%ebx), %edi
+ addl %edi, %edx // Add the object size.
+ movl %edx, THREAD_LOCAL_POS_OFFSET(%ebx) // Update thread_local_pos_
+ addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx) // Increase thread_local_objects.
+ // Store the class pointer in the
+ // header.
+ // No fence needed for x86.
+ POISON_HEAP_REF eax
+ movl %eax, MIRROR_OBJECT_CLASS_OFFSET(%edi)
+ movl %ecx, MIRROR_ARRAY_LENGTH_OFFSET(%edi)
+ movl %edi, %eax
+ POP edi
+ ret // Fast path succeeded.
+END_MACRO
+
+MACRO1(COMPUTE_ARRAY_SIZE_UNKNOWN, slow_path)
+ // We should never enter here. Code is provided for reference.
int3
- int3
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ cmpl LITERAL((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8), %ecx
+ ja RAW_VAR(slow_path)
+ PUSH ecx
+ movl %ecx, %edx
+ movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%eax), %ecx // Load component type.
+ UNPOISON_HEAP_REF ecx
+ movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%ecx), %ecx // Load primitive type.
+ shr MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %ecx // Get component size shift.
+ sall %cl, %edx // Calculate array count shifted.
+ // Add array header + alignment rounding.
+ add MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %edx
+ // Add 4 extra bytes if we are doing a long array.
+ add MACRO_LITERAL(1), %ecx
+ and MACRO_LITERAL(4), %ecx
+#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
+#error Long array data offset must be 4 greater than int array data offset.
#endif
- PUSH edi
- ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
-.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
- ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
-END_FUNCTION art_quick_alloc_object_resolved_region_tlab
+ addl %ecx, %edx
+ POP ecx
+END_MACRO
+MACRO1(COMPUTE_ARRAY_SIZE_8, slow_path)
+ // EAX: mirror::Class* klass, ECX: int32_t component_count
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ cmpl LITERAL(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET), %ecx
+ ja RAW_VAR(slow_path)
+ // Add array header + alignment rounding.
+ leal (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)(%ecx), %edx
+END_MACRO
+
+MACRO1(COMPUTE_ARRAY_SIZE_16, slow_path)
+ // EAX: mirror::Class* klass, ECX: int32_t component_count
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ cmpl LITERAL((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2), %ecx
+ ja RAW_VAR(slow_path)
+ // Add array header + alignment rounding.
+ leal ((MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) / 2)(%ecx), %edx
+ sall MACRO_LITERAL(1), %edx
+END_MACRO
+
+MACRO1(COMPUTE_ARRAY_SIZE_32, slow_path)
+ // EAX: mirror::Class* klass, ECX: int32_t component_count
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ cmpl LITERAL((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4), %ecx
+ ja RAW_VAR(slow_path)
+ // Add array header + alignment rounding.
+ leal ((MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) / 4)(%ecx), %edx
+ sall MACRO_LITERAL(2), %edx
+END_MACRO
+
+MACRO1(COMPUTE_ARRAY_SIZE_64, slow_path)
+ // EAX: mirror::Class* klass, ECX: int32_t component_count
+ // Possibly a large object, go slow.
+ // Also does negative array size check.
+ cmpl LITERAL((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8), %ecx
+ ja RAW_VAR(slow_path)
+ // Add array header + alignment rounding.
+ leal ((MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) / 8)(%ecx), %edx
+ sall MACRO_LITERAL(3), %edx
+END_MACRO
+
+MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
+ DEFINE_FUNCTION VAR(c_entrypoint)
+ // EAX: mirror::Class* klass, ECX: int32_t component_count
+ PUSH edi
+ CALL_MACRO(size_setup) .Lslow_path\c_entrypoint
+ ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
+.Lslow_path\c_entrypoint:
+ POP edi
+ SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx // save ref containing registers for GC
+ // Outgoing argument set up
+ PUSH eax // alignment padding
+ pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
+ CFI_ADJUST_CFA_OFFSET(4)
+ PUSH ecx
+ PUSH eax
+ call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*)
+ addl LITERAL(16), %esp // pop arguments
+ CFI_ADJUST_CFA_OFFSET(-16)
+ RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
+ RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception
+ END_FUNCTION VAR(c_entrypoint)
+END_MACRO
+
+
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
+
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
+GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
DEFINE_FUNCTION art_quick_resolve_string
SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 10f9047..a1ae858 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1006,7 +1006,8 @@
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
-DEFINE_FUNCTION art_quick_alloc_object_resolved_rosalloc
+MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
+ DEFINE_FUNCTION VAR(c_name)
// Fast path rosalloc allocation.
// RDI: mirror::Class*, RAX: return value
// RSI, RDX, RCX, R8, R9: free.
@@ -1015,14 +1016,14 @@
movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread
movq THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx // rcx = alloc stack top.
cmpq THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
- jae .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ jae .Lslow_path\c_name
// Load the object size
movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
// Check if the size is for a thread
// local allocation. Also does the
// initialized and finalizable checks.
cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
- ja .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ ja .Lslow_path\c_name
// Compute the rosalloc bracket index
// from the size.
shrq LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
@@ -1036,7 +1037,7 @@
// will be the return val.
movq (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
testq %rax, %rax
- jz .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+ jz .Lslow_path\c_name
// "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
// Push the new object onto the thread
// local allocation stack and
@@ -1063,25 +1064,19 @@
decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
// No fence necessary for x86.
ret
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
+.Lslow_path\c_name:
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
- call SYMBOL(artAllocObjectFromCodeResolvedRosAlloc) // cxx_name(arg0, Thread*)
+ call CALLVAR(cxx_name) // cxx_name(arg0, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception
-END_FUNCTION art_quick_alloc_object_rosalloc
-
-// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
-//
-// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
-// RCX: scratch, r8: Thread::Current().
-MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
- testl %edx, %edx // Check null class
- jz RAW_VAR(slowPathLabel)
- ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
+ END_FUNCTION VAR(c_name)
END_MACRO
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
+ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
+
// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
@@ -1220,12 +1215,7 @@
movq %rsi, %r9
salq MACRO_LITERAL(3), %r9
// Add array header + alignment rounding.
- // Add 4 extra bytes for array data alignment
- addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK + 4), %r9
-END_MACRO
-
-// The slow path code for art_quick_alloc_array_*tlab.
-MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name)
+ addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
END_MACRO
MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)