diff options
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 112 |
1 files changed, 81 insertions, 31 deletions
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 562ee2d810..8064ed696f 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -894,57 +894,107 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_FUNCTION art_quick_alloc_object_rosalloc -// A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). -DEFINE_FUNCTION art_quick_alloc_object_tlab - // Fast path tlab allocation. - // RDI: uint32_t type_idx, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - // TODO: Add read barrier when this function is used. - // Note this function can/should implement read barrier fast path only - // (no read barrier slow path) because this is the fast path of tlab allocation. - // We can fall back to the allocation slow path to do the read barrier slow path. -#if defined(USE_READ_BARRIER) - int3 - int3 -#endif - // Might need a special macro since rsi and edx is 32b/64b mismatched. - movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array - // TODO: Add read barrier when this function is used. - // Might need to break down into multiple instructions to get the base address in a register. - // Load the class - movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx +// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. +// +// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value. +// RCX: scratch, r8: Thread::Current(). +MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel) testl %edx, %edx // Check null class - jz .Lart_quick_alloc_object_tlab_slow_path + jz RAW_VAR(slowPathLabel) // Check class status. cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx) - jne .Lart_quick_alloc_object_tlab_slow_path - // Check access flags has kAccClassIsFinalizable + jne RAW_VAR(slowPathLabel) + // No fake dependence needed on x86 + // between status and flags load, + // since each load is a load-acquire, + // no loads reordering. + // Check access flags has + // kAccClassIsFinalizable testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx) - jnz .Lart_quick_alloc_object_tlab_slow_path + jnz RAW_VAR(slowPathLabel) + movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread + movq THREAD_LOCAL_END_OFFSET(%r8), %rax // Load thread_local_end. + subq THREAD_LOCAL_POS_OFFSET(%r8), %rax // Compute the remaining buffer size. movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx // Load the object size. + cmpq %rax, %rcx // Check if it fits. OK to do this + // before rounding up the object size + // assuming the buf size alignment. + ja RAW_VAR(slowPathLabel) addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx // Align the size by 8. (addr + 7) & ~7. andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx - movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread - movq THREAD_LOCAL_POS_OFFSET(%r8), %rax // Load thread_local_pos. + movq THREAD_LOCAL_POS_OFFSET(%r8), %rax // Load thread_local_pos + // as allocated object. addq %rax, %rcx // Add the object size. - cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx // Check if it fits. - ja .Lart_quick_alloc_object_tlab_slow_path movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8) // Update thread_local_pos. - addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increment thread_local_objects. + addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increase thread_local_objects. // Store the class pointer in the header. // No fence needed for x86. + POISON_HEAP_REF edx movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax) ret // Fast path succeeded. -.Lart_quick_alloc_object_tlab_slow_path: +END_MACRO + +// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. +MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC // Outgoing argument set up movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() - call SYMBOL(artAllocObjectFromCodeTLAB) // cxx_name(arg0, arg1, Thread*) + call VAR(cxx_name) // cxx_name(arg0, arg1, Thread*) RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception +END_MACRO + +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). +DEFINE_FUNCTION art_quick_alloc_object_tlab + // Fast path tlab allocation. + // RDI: uint32_t type_idx, RSI: ArtMethod* + // RDX, RCX, R8, R9: free. RAX: return val. +#if defined(USE_READ_BARRIER) + int3 + int3 +#endif + // Might need a special macro since rsi and edx is 32b/64b mismatched. + movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array + // Might need to break down into multiple instructions to get the base address in a register. + // Load the class + movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx + ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path +.Lart_quick_alloc_object_tlab_slow_path: + ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB END_FUNCTION art_quick_alloc_object_tlab -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB). +DEFINE_FUNCTION art_quick_alloc_object_region_tlab + // Fast path region tlab allocation. + // RDI: uint32_t type_idx, RSI: ArtMethod* + // RDX, RCX, R8, R9: free. RAX: return val. +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif + // Might need a special macro since rsi and edx is 32b/64b mismatched. + movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array + // Might need to break down into multiple instructions to get the base address in a register. + // Load the class + movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path +.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: + ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path +.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: + // The read barrier slow path. Mark the class. + PUSH rdi + PUSH rsi + // Outgoing argument set up + movq %rdx, %rdi // Pass the class as the first param. + call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) + movq %rax, %rdx + POP rsi + POP rdi + jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_object_region_tlab_slow_path: + ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB +END_FUNCTION art_quick_alloc_object_region_tlab ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER |