diff options
| -rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 48 | ||||
| -rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 48 | ||||
| -rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 35 | ||||
| -rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 28 | ||||
| -rw-r--r-- | runtime/mirror/class-inl.h | 12 | ||||
| -rw-r--r-- | runtime/mirror/class.cc | 11 | ||||
| -rw-r--r-- | runtime/mirror/class.h | 6 |
7 files changed, 63 insertions, 125 deletions
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index bc4c999735..c51c336d28 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -1086,25 +1086,6 @@ ENTRY art_quick_alloc_object_rosalloc // Load the class (r2) ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] cbz r2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class - // Check class status. - ldr r3, [r2, #MIRROR_CLASS_STATUS_OFFSET] - cmp r3, #MIRROR_CLASS_STATUS_INITIALIZED - bne .Lart_quick_alloc_object_rosalloc_slow_path - // Add a fake dependence from the - // following access flag and size - // loads to the status load. - // This is to prevent those loads - // from being reordered above the - // status load and reading wrong - // values (an alternative is to use - // a load-acquire for the status). - eor r3, r3, r3 - add r2, r2, r3 - // Check access flags has - // kAccClassIsFinalizable - ldr r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET] - tst r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE - bne .Lart_quick_alloc_object_rosalloc_slow_path ldr r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local // allocation stack has room. @@ -1113,22 +1094,21 @@ ENTRY art_quick_alloc_object_rosalloc cmp r3, r12 bhs .Lart_quick_alloc_object_rosalloc_slow_path - ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET] // Load the object size (r3) + ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread - // local allocation + // local allocation. Also does the + // initialized and finalizable checks. bhs .Lart_quick_alloc_object_rosalloc_slow_path // Compute the rosalloc bracket index - // from the size. - // Align up the size by the rosalloc - // bracket quantum size and divide - // by the quantum size and subtract - // by 1. This code is a shorter but - // equivalent version. - sub r3, r3, #1 - lsr r3, r3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT + // from the size. Since the size is + // already aligned we can combine the + // two shifts together. + add r12, r9, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT) + // Subtract pointer size since ther + // are no runs for 0 byte allocations + // and the size is already aligned. // Load the rosalloc run (r12) - add r12, r9, r3, lsl #POINTER_SIZE_SHIFT - ldr r12, [r12, #THREAD_ROSALLOC_RUNS_OFFSET] + ldr r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)] // Load the free list head (r3). This // will be the return val. ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] @@ -1153,7 +1133,7 @@ ENTRY art_quick_alloc_object_rosalloc // to later accesses to the class // object. Alternatively we could use // "ishst" if we use load-acquire for - // the class status load.) + // the object size load. // Needs to be done before pushing on // allocation since Heap::VisitObjects // relies on seeing the class pointer. @@ -1200,9 +1180,7 @@ END art_quick_alloc_object_rosalloc ldrd r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET] sub r12, r3, r12 // Compute the remaining buf size. ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3). - cmp r3, r12 // Check if it fits. OK to do this - // before rounding up the object size - // assuming the buf size alignment. + cmp r3, r12 // Check if it fits. bhi \slowPathLabel // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. // Reload old thread_local_pos (r0) diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 82dac9ca40..03768af0a4 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1860,47 +1860,27 @@ ENTRY art_quick_alloc_object_rosalloc // Load the class (x2) ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] cbz x2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class - // Check class status. - ldr w3, [x2, #MIRROR_CLASS_STATUS_OFFSET] - cmp x3, #MIRROR_CLASS_STATUS_INITIALIZED - bne .Lart_quick_alloc_object_rosalloc_slow_path - // Add a fake dependence from the - // following access flag and size - // loads to the status load. - // This is to prevent those loads - // from being reordered above the - // status load and reading wrong - // values (an alternative is to use - // a load-acquire for the status). - eor x3, x3, x3 - add x2, x2, x3 - // Check access flags has - // kAccClassIsFinalizable - ldr w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET] - tst x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE - bne .Lart_quick_alloc_object_rosalloc_slow_path ldr x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local // allocation stack has room. // ldp won't work due to large offset. ldr x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] cmp x3, x4 bhs .Lart_quick_alloc_object_rosalloc_slow_path - ldr w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET] // Load the object size (x3) + ldr w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3) cmp x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread - // local allocation + // local allocation. Also does the + // finalizable and initialization + // checks. bhs .Lart_quick_alloc_object_rosalloc_slow_path // Compute the rosalloc bracket index - // from the size. - // Align up the size by the rosalloc - // bracket quantum size and divide - // by the quantum size and subtract - // by 1. This code is a shorter but - // equivalent version. - sub x3, x3, #1 - lsr x3, x3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - // Load the rosalloc run (x4) - add x4, xSELF, x3, lsl #POINTER_SIZE_SHIFT - ldr x4, [x4, #THREAD_ROSALLOC_RUNS_OFFSET] + // from the size. Since the size is + // already aligned we can combine the + // two shifts together. + add x4, xSELF, x3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT) + // Subtract pointer size since ther + // are no runs for 0 byte allocations + // and the size is already aligned. + ldr x4, [x4, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)] // Load the free list head (x3). This // will be the return val. ldr x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] @@ -1921,11 +1901,11 @@ ENTRY art_quick_alloc_object_rosalloc str w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET] // Fence. This is "ish" not "ishst" so // that it also ensures ordering of - // the class status load with respect + // the object size load with respect // to later accesses to the class // object. Alternatively we could use // "ishst" if we use load-acquire for - // the class status load.) + // the class status load. // Needs to be done before pushing on // allocation since Heap::VisitObjects // relies on seeing the class pointer. diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 9c22245e0d..67ebf50843 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -897,17 +897,6 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx testl %edx, %edx // Check null class jz .Lart_quick_alloc_object_rosalloc_slow_path - // Check class status - cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx) - jne .Lart_quick_alloc_object_rosalloc_slow_path - // No fake dependence needed on x86 - // between status and flags load, - // since each load is a load-acquire, - // no loads reordering. - // Check access flags has - // kAccClassIsFinalizable - testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx) - jnz .Lart_quick_alloc_object_rosalloc_slow_path movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread // Check if the thread local allocation @@ -916,21 +905,19 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %edi jae .Lart_quick_alloc_object_rosalloc_slow_path - movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %edi // Load the object size (edi) + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %edi // Load the object size (edi) // Check if the size is for a thread - // local allocation + // local allocation. Also does the + // finalizable and initialization check. cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %edi ja .Lart_quick_alloc_object_rosalloc_slow_path - decl %edi shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %edi // Calculate the rosalloc bracket index // from object size. - // Align up the size by the rosalloc - // bracket quantum size and divide - // by the quantum size and subtract - // by 1. This code is a shorter but - // equivalent version. // Load thread local rosalloc run (ebx) - movl THREAD_ROSALLOC_RUNS_OFFSET(%ebx, %edi, __SIZEOF_POINTER__), %ebx + // Subtract __SIZEOF_POINTER__ to subtract + // one from edi as there is no 0 byte run + // and the size is already aligned. + movl (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%ebx, %edi, __SIZEOF_POINTER__), %ebx // Load free_list head (edi), // this will be the return value. movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %edi @@ -990,17 +977,11 @@ END_FUNCTION art_quick_alloc_object_rosalloc MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel) testl %edx, %edx // Check null class jz VAR(slowPathLabel) - // No fake dependence needed on x86 - // between status and flags load, - // since each load is a load-acquire, - // no loads reordering. movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread movl THREAD_LOCAL_END_OFFSET(%ebx), %edi // Load thread_local_end. subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi // Compute the remaining buffer size. movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %esi // Load the object size. - cmpl %edi, %esi // Check if it fits. OK to do this - // before rounding up the object size - // assuming the buf size alignment. + cmpl %edi, %esi // Check if it fits. ja VAR(slowPathLabel) movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax // Load thread_local_pos // as allocated object. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index c568715002..b8057031f3 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -935,17 +935,6 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx testl %edx, %edx // Check null class jz .Lart_quick_alloc_object_rosalloc_slow_path - // Check class status. - cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx) - jne .Lart_quick_alloc_object_rosalloc_slow_path - // We don't need a fence (between the - // the status and the access flag - // loads) here because every load is - // a load acquire on x86. - // Check access flags has - // kAccClassIsFinalizable - testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx) - jnz .Lart_quick_alloc_object_rosalloc_slow_path // Check if the thread local // allocation stack has room. movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread @@ -953,22 +942,21 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc cmpq THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx jae .Lart_quick_alloc_object_rosalloc_slow_path // Load the object size - movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %eax + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdx), %eax // Check if the size is for a thread - // local allocation + // local allocation. Also does the + // initialized and finalizable checks. cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax ja .Lart_quick_alloc_object_rosalloc_slow_path // Compute the rosalloc bracket index // from the size. - // Align up the size by the rosalloc - // bracket quantum size and divide - // by the quantum size and subtract - // by 1. This code is a shorter but - // equivalent version. - subq LITERAL(1), %rax shrq LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax // Load the rosalloc run (r9) - movq THREAD_ROSALLOC_RUNS_OFFSET(%r8, %rax, __SIZEOF_POINTER__), %r9 + // Subtract __SIZEOF_POINTER__ to + // subtract one from edi as there is no + // 0 byte run and the size is already + // aligned. + movq (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9 // Load the free list head (rax). This // will be the return val. movq (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h index 26b8e8a8f4..d1d8caab00 100644 --- a/runtime/mirror/class-inl.h +++ b/runtime/mirror/class-inl.h @@ -42,11 +42,19 @@ namespace mirror { template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption> inline uint32_t Class::GetObjectSize() { // Note: Extra parentheses to avoid the comma being interpreted as macro parameter separator. - DCHECK((!IsVariableSize<kVerifyFlags, kReadBarrierOption>())) << " class=" << PrettyTypeOf(this); + DCHECK((!IsVariableSize<kVerifyFlags, kReadBarrierOption>())) << "class=" << PrettyTypeOf(this); return GetField32(ObjectSizeOffset()); } template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption> +inline uint32_t Class::GetObjectSizeAllocFastPath() { + // Note: Extra parentheses to avoid the comma being interpreted as macro parameter separator. + DCHECK((!IsVariableSize<kVerifyFlags, kReadBarrierOption>())) << "class=" << PrettyTypeOf(this); + return GetField32(ObjectSizeAllocFastPathOffset()); +} + + +template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption> inline Class* Class::GetSuperClass() { // Can only get super class for loaded classes (hack for when runtime is // initializing) @@ -862,7 +870,7 @@ inline void Class::InitializeClassVisitor::operator()( klass->SetDexClassDefIndex(DexFile::kDexNoIndex16); // Default to no valid class def index. klass->SetDexTypeIndex(DexFile::kDexNoIndex16); // Default to no valid type index. // Default to force slow path until initialized. - klass->SetObjectSizeAllocFastPath(std::numeric_limits<int32_t>::max()); + klass->SetObjectSizeAllocFastPath(std::numeric_limits<uint32_t>::max()); } inline void Class::SetAccessFlags(uint32_t new_access_flags) { diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc index b60c5731ae..c979c28138 100644 --- a/runtime/mirror/class.cc +++ b/runtime/mirror/class.cc @@ -109,12 +109,11 @@ void Class::SetStatus(Handle<Class> h_this, Status new_status, Thread* self) { // alloc path sees a valid object size, we would know that it's initialized as long as it has a // load-acquire/fake dependency. if (new_status == kStatusInitialized && !h_this->IsVariableSize()) { - uint32_t object_size = RoundUp(h_this->GetObjectSize(), kObjectAlignment); - if (h_this->IsFinalizable()) { - // Finalizable objects must always go slow path. - object_size = std::numeric_limits<int32_t>::max(); + DCHECK_EQ(h_this->GetObjectSizeAllocFastPath(), std::numeric_limits<uint32_t>::max()); + // Finalizable objects must always go slow path. + if (!h_this->IsFinalizable()) { + h_this->SetObjectSizeAllocFastPath(RoundUp(h_this->GetObjectSize(), kObjectAlignment)); } - h_this->SetObjectSizeAllocFastPath(object_size); } if (!class_linker_initialized) { @@ -149,7 +148,7 @@ void Class::SetClassSize(uint32_t new_class_size) { if (kIsDebugBuild && new_class_size < GetClassSize()) { DumpClass(LOG(INTERNAL_FATAL), kDumpClassFullDetail); LOG(INTERNAL_FATAL) << new_class_size << " vs " << GetClassSize(); - LOG(FATAL) << " class=" << PrettyTypeOf(this); + LOG(FATAL) << "class=" << PrettyTypeOf(this); } // Not called within a transaction. SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), new_class_size); diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index f8f414b3d9..99b7769b03 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -598,6 +598,10 @@ class MANAGED Class FINAL : public Object { void SetObjectSizeAllocFastPath(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_); + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, + ReadBarrierOption kReadBarrierOption = kWithReadBarrier> + uint32_t GetObjectSizeAllocFastPath() REQUIRES_SHARED(Locks::mutator_lock_); + void SetObjectSizeWithoutChecks(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_) { // Not called within a transaction. @@ -1462,7 +1466,7 @@ class MANAGED Class FINAL : public Object { // See also class_size_. uint32_t object_size_; - // Aligned object size for allocation fast path. The value is max int if the object is + // Aligned object size for allocation fast path. The value is max uint32_t if the object is // uninitialized or finalizable. Not currently used for variable sized objects. uint32_t object_size_alloc_fast_path_; |