diff options
| author | 2016-11-30 11:52:19 +0000 | |
|---|---|---|
| committer | 2016-11-30 11:52:19 +0000 | |
| commit | 96172e0172c5fca6e9a5ad4b857a24d8c7b064e5 (patch) | |
| tree | 04bb8fe644c9bc961c8ac33e609c1b162ec45222 | |
| parent | 626b839f881f09a1481377a76712d08580c47a16 (diff) | |
Revert CC related changes.
Revert: "X86_64: Add allocation entrypoint switching for CC is_marking"
Revert: "Fix mips build in InitEntryPoints"
Revert: "Fix mac build in ResetQuickAllocEntryPoints"
Test: test-art-target-run-test
Change-Id: If38d44edf8c5def5c4d8c9419e4af0cd8d3be724
| -rw-r--r-- | runtime/arch/mips/entrypoints_init_mips.cc | 2 | ||||
| -rw-r--r-- | runtime/arch/quick_alloc_entrypoints.S | 35 | ||||
| -rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 7 | ||||
| -rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 150 | ||||
| -rw-r--r-- | runtime/entrypoints/quick/quick_alloc_entrypoints.cc | 10 | ||||
| -rw-r--r-- | runtime/entrypoints/quick/quick_alloc_entrypoints.h | 4 | ||||
| -rw-r--r-- | runtime/entrypoints/quick/quick_default_init_entrypoints.h | 2 | ||||
| -rw-r--r-- | runtime/gc/heap-inl.h | 124 | ||||
| -rw-r--r-- | runtime/gc/heap.cc | 69 | ||||
| -rw-r--r-- | runtime/gc/heap.h | 17 | ||||
| -rw-r--r-- | runtime/instrumentation.cc | 2 | ||||
| -rw-r--r-- | runtime/thread.cc | 14 | ||||
| -rw-r--r-- | runtime/thread.h | 2 |
13 files changed, 170 insertions, 268 deletions
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index 5c569232ac..6a442a55bf 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -71,7 +71,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub; // Alloc - ResetQuickAllocEntryPoints(qpoints, /*is_marking*/ false); + ResetQuickAllocEntryPoints(qpoints); // Cast qpoints->pInstanceofNonTrivial = artInstanceOfFromCode; diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S index db2fdcabea..fa86bf4087 100644 --- a/runtime/arch/quick_alloc_entrypoints.S +++ b/runtime/arch/quick_alloc_entrypoints.S @@ -107,28 +107,7 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) .endm -.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR -// This is to be separately defined for each architecture to allow a hand-written assembly fast path. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) -.endm - .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS -GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS -GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR -.endm - -.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc) @@ -208,6 +187,20 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, B GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented) +// This is to be separately defined for each architecture to allow a hand-written assembly fast path. +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) + GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented) diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 6fbc9547e6..fb405fac0f 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1085,12 +1085,15 @@ MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be called -// for CC if the GC is not marking. +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). DEFINE_FUNCTION art_quick_alloc_object_tlab // Fast path tlab allocation. // EAX: uint32_t type_idx/return value, ECX: ArtMethod*. // EBX, EDX: free. +#if defined(USE_READ_BARRIER) + int3 + int3 +#endif PUSH esi PUSH edi movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx // Load dex cache resolved types array diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index f8066e45fb..860b77efe3 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -18,13 +18,6 @@ #include "arch/quick_alloc_entrypoints.S" -MACRO0(ASSERT_USE_READ_BARRIER) -#if !defined(USE_READ_BARRIER) - int3 - int3 -#endif -END_MACRO - MACRO0(SETUP_FP_CALLEE_SAVE_FRAME) // Create space for ART FP callee-saved registers subq MACRO_LITERAL(4 * 8), %rsp @@ -979,10 +972,8 @@ MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION) END_MACRO // Generate the allocation entrypoints for each allocator. -GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS - +GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS // Comment out allocators that have x86_64 specific asm. -// Region TLAB: // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) @@ -995,19 +986,6 @@ GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) -// Normal TLAB: -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB) -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). DEFINE_FUNCTION art_quick_alloc_object_rosalloc @@ -1184,11 +1162,16 @@ MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name) RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be -// called with CC if the GC is not active. +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). DEFINE_FUNCTION art_quick_alloc_object_tlab + // Fast path tlab allocation. // RDI: uint32_t type_idx, RSI: ArtMethod* // RDX, RCX, R8, R9: free. RAX: return val. +#if defined(USE_READ_BARRIER) + int3 + int3 +#endif + // Might need a special macro since rsi and edx is 32b/64b mismatched. movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array // Might need to break down into multiple instructions to get the base address in a register. // Load the class @@ -1198,69 +1181,29 @@ DEFINE_FUNCTION art_quick_alloc_object_tlab ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB END_FUNCTION art_quick_alloc_object_tlab -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be -// called with CC if the GC is not active. -DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - movq %rdi, %rdx - ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path -.Lart_quick_alloc_object_resolved_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB -END_FUNCTION art_quick_alloc_object_resolved_tlab - -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB). -// May be called with CC if the GC is not active. -DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - movq %rdi, %rdx - ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path -.Lart_quick_alloc_object_initialized_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB -END_FUNCTION art_quick_alloc_object_initialized_tlab - -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB). -DEFINE_FUNCTION art_quick_alloc_array_tlab - // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod* - // RCX: klass, R8, R9: free. RAX: return val. - movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx // Load dex cache resolved types array - movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx // Load the class - testl %ecx, %ecx - jz .Lart_quick_alloc_array_tlab_slow_path - ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_tlab_slow_path -.Lart_quick_alloc_array_tlab_slow_path: - ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeTLAB -END_FUNCTION art_quick_alloc_array_tlab - -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB). -DEFINE_FUNCTION art_quick_alloc_array_resolved_tlab - // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod* - // RCX: mirror::Class* klass, R8, R9: free. RAX: return val. - movq %rdi, %rcx - // Already resolved, no null check. - ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_tlab_slow_path -.Lart_quick_alloc_array_resolved_tlab_slow_path: - ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedTLAB -END_FUNCTION art_quick_alloc_array_resolved_tlab - // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB). DEFINE_FUNCTION art_quick_alloc_array_region_tlab // Fast path region tlab allocation. // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod* // RCX: klass, R8, R9: free. RAX: return val. - ASSERT_USE_READ_BARRIER +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx // Load dex cache resolved types array movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx // Load the class // Null check so that we can load the lock word. testl %ecx, %ecx jz .Lart_quick_alloc_array_region_tlab_slow_path - // Since we have allocation entrypoint switching, we know the GC is marking. - // Check the mark bit, if it is 0, do the read barrier mark. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) - jz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path + + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit: ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_region_tlab_slow_path +.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking: + // Check the mark bit, if it is 1 return. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) + jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path: // The read barrier slow path. Mark the class. PUSH rdi @@ -1283,11 +1226,33 @@ DEFINE_FUNCTION art_quick_alloc_array_resolved_region_tlab // Fast path region tlab allocation. // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod* // RCX: mirror::Class* klass, R8, R9: free. RAX: return val. - ASSERT_USE_READ_BARRIER +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif movq %rdi, %rcx - // Caller is responsible for read barrier. // Already resolved, no null check. + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking +.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit: ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path +.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking: + // Check the mark bit, if it is 1 return. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) + jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path: + // The read barrier slow path. Mark the class. + PUSH rdi + PUSH rsi + PUSH rdx + // Outgoing argument set up + movq %rcx, %rdi // Pass the class as the first param. + call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) + movq %rax, %rcx + POP rdx + POP rsi + POP rdi + jmp .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit .Lart_quick_alloc_array_resolved_region_tlab_slow_path: ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB END_FUNCTION art_quick_alloc_array_resolved_region_tlab @@ -1297,19 +1262,24 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab // Fast path region tlab allocation. // RDI: uint32_t type_idx, RSI: ArtMethod* // RDX, RCX, R8, R9: free. RAX: return val. - ASSERT_USE_READ_BARRIER +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx // Load the class // Null check so that we can load the lock word. testl %edx, %edx jz .Lart_quick_alloc_object_region_tlab_slow_path - // Since we have allocation entrypoint switching, we know the GC is marking. - // Check the mark bit, if it is 0, do the read barrier mark. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path + // Test if the GC is marking. + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: - // Use resolved one since we already did the null check. - ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path + ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path +.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking: + // Check the mark bit, if it is 1 avoid the read barrier. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) + jnz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: // The read barrier slow path. Mark the class. PUSH rdi @@ -1332,7 +1302,10 @@ DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab // Fast path region tlab allocation. // RDI: mirror::Class* klass, RSI: ArtMethod* // RDX, RCX, R8, R9: free. RAX: return val. - ASSERT_USE_READ_BARRIER +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif // No read barrier since the caller is responsible for that. movq %rdi, %rdx ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path @@ -1345,7 +1318,10 @@ DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab // Fast path region tlab allocation. // RDI: mirror::Class* klass, RSI: ArtMethod* // RDX, RCX, R8, R9: free. RAX: return val. - ASSERT_USE_READ_BARRIER +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif movq %rdi, %rdx // No read barrier since the caller is responsible for that. ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc index 82bb8e53c6..397655a895 100644 --- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc @@ -292,7 +292,7 @@ void SetQuickAllocEntryPointsInstrumented(bool instrumented) { entry_points_instrumented = instrumented; } -void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) { +void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) { #if !defined(__APPLE__) || !defined(__LP64__) switch (entry_points_allocator) { case gc::kAllocatorTypeDlMalloc: { @@ -320,12 +320,7 @@ void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) { } case gc::kAllocatorTypeRegionTLAB: { CHECK(kMovingCollector); - if (is_marking) { - SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented); - } else { - // Not marking means we need no read barriers and can just use the normal TLAB case. - SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented); - } + SetQuickAllocEntryPoints_region_tlab(qpoints, entry_points_instrumented); return; } default: @@ -333,7 +328,6 @@ void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking) { } #else UNUSED(qpoints); - UNUSED(is_marking); #endif UNIMPLEMENTED(FATAL); UNREACHABLE(); diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.h b/runtime/entrypoints/quick/quick_alloc_entrypoints.h index bd1e295e48..14a8e0428b 100644 --- a/runtime/entrypoints/quick/quick_alloc_entrypoints.h +++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.h @@ -23,9 +23,7 @@ namespace art { -// is_marking is only used for CC, if the GC is marking the allocation entrypoint is the marking -// one. -void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints, bool is_marking); +void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints); // Runtime shutdown lock is necessary to prevent races in thread initialization. When the thread is // starting it doesn't hold the mutator lock until after it has been added to the thread list. diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index 78dad94dfe..df23f94a31 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -31,7 +31,7 @@ void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub; // Alloc - ResetQuickAllocEntryPoints(qpoints, /* is_marking */ true); + ResetQuickAllocEntryPoints(qpoints); // DexCache qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage; diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 54f221056a..97129e8b19 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -247,7 +247,7 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, if (allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRegionTLAB && allocator_type != kAllocatorTypeRosAlloc && - UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) { + UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) { return nullptr; } mirror::Object* ret; @@ -267,9 +267,8 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) { // If running on valgrind or asan, we should be using the instrumented path. size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size); - if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, - max_bytes_tl_bulk_allocated, - kGrow))) { + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, + max_bytes_tl_bulk_allocated))) { return nullptr; } ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, @@ -278,18 +277,14 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, DCHECK(!is_running_on_memory_tool_); size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size); - if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, - max_bytes_tl_bulk_allocated, - kGrow))) { + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, + max_bytes_tl_bulk_allocated))) { return nullptr; } if (!kInstrumented) { DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size)); } - ret = rosalloc_space_->AllocNonvirtual(self, - alloc_size, - bytes_allocated, - usable_size, + ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } break; @@ -297,34 +292,22 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, case kAllocatorTypeDlMalloc: { if (kInstrumented && UNLIKELY(is_running_on_memory_tool_)) { // If running on valgrind, we should be using the instrumented path. - ret = dlmalloc_space_->Alloc(self, - alloc_size, - bytes_allocated, - usable_size, + ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } else { DCHECK(!is_running_on_memory_tool_); - ret = dlmalloc_space_->AllocNonvirtual(self, - alloc_size, - bytes_allocated, - usable_size, + ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } break; } case kAllocatorTypeNonMoving: { - ret = non_moving_space_->Alloc(self, - alloc_size, - bytes_allocated, - usable_size, + ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); break; } case kAllocatorTypeLOS: { - ret = large_object_space_->Alloc(self, - alloc_size, - bytes_allocated, - usable_size, + ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); // Note that the bump pointer spaces aren't necessarily next to // the other continuous spaces like the non-moving alloc space or @@ -332,38 +315,80 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, DCHECK(ret == nullptr || large_object_space_->Contains(ret)); break; } + case kAllocatorTypeTLAB: { + DCHECK_ALIGNED(alloc_size, space::BumpPointerSpace::kAlignment); + if (UNLIKELY(self->TlabSize() < alloc_size)) { + const size_t new_tlab_size = alloc_size + kDefaultTLABSize; + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, new_tlab_size))) { + return nullptr; + } + // Try allocating a new thread local buffer, if the allocaiton fails the space must be + // full so return null. + if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) { + return nullptr; + } + *bytes_tl_bulk_allocated = new_tlab_size; + } else { + *bytes_tl_bulk_allocated = 0; + } + // The allocation can't fail. + ret = self->AllocTlab(alloc_size); + DCHECK(ret != nullptr); + *bytes_allocated = alloc_size; + *usable_size = alloc_size; + break; + } case kAllocatorTypeRegion: { DCHECK(region_space_ != nullptr); alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment); - ret = region_space_->AllocNonvirtual<false>(alloc_size, - bytes_allocated, - usable_size, + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); break; } - case kAllocatorTypeTLAB: - FALLTHROUGH_INTENDED; case kAllocatorTypeRegionTLAB: { - DCHECK_ALIGNED(alloc_size, kObjectAlignment); - static_assert(space::RegionSpace::kAlignment == space::BumpPointerSpace::kAlignment, - "mismatched alignments"); - static_assert(kObjectAlignment == space::BumpPointerSpace::kAlignment, - "mismatched alignments"); + DCHECK(region_space_ != nullptr); + DCHECK_ALIGNED(alloc_size, space::RegionSpace::kAlignment); if (UNLIKELY(self->TlabSize() < alloc_size)) { - // kAllocatorTypeTLAB may be the allocator for region space TLAB if the GC is not marking, - // that is why the allocator is not passed down. - return AllocWithNewTLAB(self, - alloc_size, - kGrow, - bytes_allocated, - usable_size, - bytes_tl_bulk_allocated); + if (space::RegionSpace::kRegionSize >= alloc_size) { + // Non-large. Check OOME for a tlab. + if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, space::RegionSpace::kRegionSize))) { + // Try to allocate a tlab. + if (!region_space_->AllocNewTlab(self)) { + // Failed to allocate a tlab. Try non-tlab. + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); + return ret; + } + *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize; + // Fall-through. + } else { + // Check OOME for a non-tlab allocation. + if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) { + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); + return ret; + } else { + // Neither tlab or non-tlab works. Give up. + return nullptr; + } + } + } else { + // Large. Check OOME. + if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) { + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); + return ret; + } else { + return nullptr; + } + } + } else { + *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer. } // The allocation can't fail. ret = self->AllocTlab(alloc_size); DCHECK(ret != nullptr); *bytes_allocated = alloc_size; - *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer. *usable_size = alloc_size; break; } @@ -383,16 +408,15 @@ inline bool Heap::ShouldAllocLargeObject(ObjPtr<mirror::Class> c, size_t byte_co return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass()); } -inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, - size_t alloc_size, - bool grow) { +template <bool kGrow> +inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) { size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size; if (UNLIKELY(new_footprint > max_allowed_footprint_)) { if (UNLIKELY(new_footprint > growth_limit_)) { return true; } if (!AllocatorMayHaveConcurrentGC(allocator_type) || !IsGcConcurrent()) { - if (!grow) { + if (!kGrow) { return true; } // TODO: Grow for allocation is racy, fix it. diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index ae9741ffa5..f0e619dd35 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -1819,7 +1819,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, break; } // Try to transition the heap if the allocation failure was due to the space being full. - if (!IsOutOfMemoryOnAllocation(allocator, alloc_size, /*grow*/ false)) { + if (!IsOutOfMemoryOnAllocation<false>(allocator, alloc_size)) { // If we aren't out of memory then the OOM was probably from the non moving space being // full. Attempt to disable compaction and turn the main space into a non moving space. DisableMovingGc(); @@ -4225,72 +4225,5 @@ void Heap::RemoveGcPauseListener() { gc_pause_listener_.StoreRelaxed(nullptr); } -mirror::Object* Heap::AllocWithNewTLAB(Thread* self, - size_t alloc_size, - bool grow, - size_t* bytes_allocated, - size_t* usable_size, - size_t* bytes_tl_bulk_allocated) { - const AllocatorType allocator_type = GetCurrentAllocator(); - if (allocator_type == kAllocatorTypeTLAB) { - DCHECK(bump_pointer_space_ != nullptr); - const size_t new_tlab_size = alloc_size + kDefaultTLABSize; - if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) { - return nullptr; - } - // Try allocating a new thread local buffer, if the allocation fails the space must be - // full so return null. - if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) { - return nullptr; - } - *bytes_tl_bulk_allocated = new_tlab_size; - } else { - DCHECK(allocator_type == kAllocatorTypeRegionTLAB); - DCHECK(region_space_ != nullptr); - if (space::RegionSpace::kRegionSize >= alloc_size) { - // Non-large. Check OOME for a tlab. - if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, - space::RegionSpace::kRegionSize, - grow))) { - // Try to allocate a tlab. - if (!region_space_->AllocNewTlab(self)) { - // Failed to allocate a tlab. Try non-tlab. - return region_space_->AllocNonvirtual<false>(alloc_size, - bytes_allocated, - usable_size, - bytes_tl_bulk_allocated); - } - *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize; - // Fall-through to using the TLAB below. - } else { - // Check OOME for a non-tlab allocation. - if (!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow)) { - return region_space_->AllocNonvirtual<false>(alloc_size, - bytes_allocated, - usable_size, - bytes_tl_bulk_allocated); - } - // Neither tlab or non-tlab works. Give up. - return nullptr; - } - } else { - // Large. Check OOME. - if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow))) { - return region_space_->AllocNonvirtual<false>(alloc_size, - bytes_allocated, - usable_size, - bytes_tl_bulk_allocated); - } - return nullptr; - } - } - // Refilled TLAB, return. - mirror::Object* ret = self->AllocTlab(alloc_size); - DCHECK(ret != nullptr); - *bytes_allocated = alloc_size; - *usable_size = alloc_size; - return ret; -} - } // namespace gc } // namespace art diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 3a8e29b08a..0c671d269d 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -854,10 +854,6 @@ class Heap { allocator_type != kAllocatorTypeRegionTLAB; } static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) { - if (kUseReadBarrier) { - // Read barrier may have the TLAB allocator but is always concurrent. TODO: clean this up. - return true; - } return allocator_type != kAllocatorTypeBumpPointer && allocator_type != kAllocatorTypeTLAB; @@ -927,20 +923,11 @@ class Heap { size_t* bytes_tl_bulk_allocated) REQUIRES_SHARED(Locks::mutator_lock_); - mirror::Object* AllocWithNewTLAB(Thread* self, - size_t alloc_size, - bool grow, - size_t* bytes_allocated, - size_t* usable_size, - size_t* bytes_tl_bulk_allocated) - REQUIRES_SHARED(Locks::mutator_lock_); - void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) REQUIRES_SHARED(Locks::mutator_lock_); - ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, - size_t alloc_size, - bool grow); + template <bool kGrow> + ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size); // Run the finalizers. If timeout is non zero, then we use the VMRuntime version. void RunFinalization(JNIEnv* env, uint64_t timeout); diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index 870d1ae9b5..d4c322eb84 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -630,7 +630,7 @@ void Instrumentation::ConfigureStubs(const char* key, InstrumentationLevel desir } static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg ATTRIBUTE_UNUSED) { - thread->ResetQuickAllocEntryPointsForThread(kUseReadBarrier && thread->GetIsGcMarking()); + thread->ResetQuickAllocEntryPointsForThread(); } void Instrumentation::SetEntrypointsInstrumented(bool instrumented) { diff --git a/runtime/thread.cc b/runtime/thread.cc index c92e38b6e8..65c86815b5 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -122,27 +122,21 @@ void Thread::SetIsGcMarkingAndUpdateEntrypoints(bool is_marking) { CHECK(kUseReadBarrier); tls32_.is_gc_marking = is_marking; UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking); - if (kRuntimeISA == kX86_64) { - // Entrypoint switching is only implemented for X86_64. - ResetQuickAllocEntryPointsForThread(is_marking); - } } void Thread::InitTlsEntryPoints() { // Insert a placeholder so we can easily tell if we call an unimplemented entry point. uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.jni_entrypoints); - uintptr_t* end = reinterpret_cast<uintptr_t*>( - reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) + sizeof(tlsPtr_.quick_entrypoints)); + uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) + + sizeof(tlsPtr_.quick_entrypoints)); for (uintptr_t* it = begin; it != end; ++it) { *it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint); } InitEntryPoints(&tlsPtr_.jni_entrypoints, &tlsPtr_.quick_entrypoints); } -void Thread::ResetQuickAllocEntryPointsForThread(bool is_marking) { - // Entrypoint switching is currnetly only faster for X86_64 since other archs don't have TLAB - // fast path for non region space entrypoints. - ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints, is_marking); +void Thread::ResetQuickAllocEntryPointsForThread() { + ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints); } class DeoptimizationContextRecord { diff --git a/runtime/thread.h b/runtime/thread.h index 35226f2230..97093a6350 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -1007,7 +1007,7 @@ class Thread { tls32_.state_and_flags.as_atomic_int.FetchAndAndSequentiallyConsistent(-1 ^ flag); } - void ResetQuickAllocEntryPointsForThread(bool is_marking); + void ResetQuickAllocEntryPointsForThread(); // Returns the remaining space in the TLAB. size_t TlabSize() const; |