diff options
| author | 2016-08-31 19:41:03 +0000 | |
|---|---|---|
| committer | 2016-08-31 19:41:04 +0000 | |
| commit | 43ac11487e34eb78fca230f9bad40d42a5b1c6c4 (patch) | |
| tree | 13a13ce9a1beeda836dba4bb280c5ceeb7a66b33 | |
| parent | e534a160337bb188fa946b671d34fdcfd87ef2da (diff) | |
| parent | b6ec5d7d257e00c0d119da48b85f8f5a1f0b09a9 (diff) | |
Merge "Fix unnecessary read barrier for resolved/initialized allocations"
| -rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 29 | ||||
| -rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 43 |
2 files changed, 25 insertions, 47 deletions
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 202846a679..3f87a1443a 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2107,7 +2107,7 @@ ENTRY art_quick_alloc_object_tlab END art_quick_alloc_object_tlab // The common code for art_quick_alloc_object_*region_tlab -.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved +.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved, read_barrier ENTRY \name // Fast path region tlab allocation. // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current @@ -2123,16 +2123,19 @@ ENTRY \name ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array // Load the class (x2) ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] + // If the class is null, go slow path. The check is required to read the lock word. + cbz w2, .Lslow_path\name .endif +.if \read_barrier // Most common case: GC is not marking. ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] cbnz x3, .Lmarking\name +.endif .Ldo_allocation\name: \fast_path .Lslow_path\name .Lmarking\name: +.if \read_barrier // GC is marking, check the lock word of the class for the mark bit. - // If the class is null, go slow path. The check is required to read the lock word. - cbz w2, .Lslow_path\name // Class is not null, check mark bit in lock word. ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET] // If the bit is not zero, do the allocation. @@ -2140,14 +2143,23 @@ ENTRY \name // The read barrier slow path. Mark // the class. stp x0, x1, [sp, #-32]! // Save registers (x0, x1, lr). + .cfi_adjust_cfa_offset 32 + .cfi_rel_offset x0, 0 + .cfi_rel_offset x1, 8 str xLR, [sp, #16] // Align sp by 16 bytes. + .cfi_rel_offset xLR, 16 mov x0, x2 // Pass the class as the first param. bl artReadBarrierMark mov x2, x0 // Get the (marked) class back. ldp x0, x1, [sp, #0] // Restore registers. + .cfi_restore x0 + .cfi_restore x1 ldr xLR, [sp, #16] + .cfi_restore xLR add sp, sp, #32 + .cfi_adjust_cfa_offset -32 b .Ldo_allocation\name +.endif .Lslow_path\name: SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. mov x2, xSELF // Pass Thread::Current. @@ -2157,9 +2169,14 @@ ENTRY \name END \name .endm -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0 -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1 -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1 +// Use ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since the null check is already done in GENERATE_ALLOC_OBJECT_TLAB. +GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 0, 1 +// No read barrier for the resolved or initialized cases since the caller is responsible for the +// read barrier due to the to-space invariant. +GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1, 0 +GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1, 0 + +// TODO: We could use this macro for the normal tlab allocator too. // The common code for art_quick_alloc_array_*region_tlab .macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index f941c521da..06ff7ab802 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1268,28 +1268,9 @@ DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab int3 int3 #endif + // No read barrier since the caller is responsible for that. movq %rdi, %rdx - cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET - jne .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking -.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit: ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path -.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking: - // Check the mark bit, if it is 1 avoid the read barrier. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) - jnz .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path: - // The read barrier slow path. Mark the class. - PUSH rdi - PUSH rsi - subq LITERAL(8), %rsp // 16 byte alignment - // Outgoing argument set up - movq %rdx, %rdi // Pass the class as the first param. - call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) - movq %rax, %rdx - addq LITERAL(8), %rsp - POP rsi - POP rdi - jmp .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit .Lart_quick_alloc_object_resolved_region_tlab_slow_path: ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB END_FUNCTION art_quick_alloc_object_resolved_region_tlab @@ -1303,29 +1284,9 @@ DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab int3 int3 #endif - // Might need a special macro since rsi and edx is 32b/64b mismatched. movq %rdi, %rdx - cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET - jne .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking -.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit: + // No read barrier since the caller is responsible for that. ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path -.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking: - // Check the mark bit, if it is 1 avoid the read barrier. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) - jnz .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path -.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path: - // The read barrier slow path. Mark the class. - PUSH rdi - PUSH rsi - subq LITERAL(8), %rsp // 16 byte alignment - // Outgoing argument set up - movq %rdx, %rdi // Pass the class as the first param. - call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) - movq %rax, %rdx - addq LITERAL(8), %rsp - POP rsi - POP rdi - jmp .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit .Lart_quick_alloc_object_initialized_region_tlab_slow_path: ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB END_FUNCTION art_quick_alloc_object_initialized_region_tlab |