diff options
| -rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 27 | ||||
| -rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 42 | ||||
| -rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 13 | ||||
| -rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 22 | ||||
| -rw-r--r-- | runtime/asm_support.h | 7 | ||||
| -rw-r--r-- | runtime/class_linker_test.cc | 2 | ||||
| -rw-r--r-- | runtime/mirror/class-inl.h | 2 | ||||
| -rw-r--r-- | runtime/mirror/class.cc | 24 | ||||
| -rw-r--r-- | runtime/mirror/class.h | 9 |
9 files changed, 58 insertions, 90 deletions
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index e25e93fcb0..bc4c999735 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -1191,25 +1191,6 @@ END art_quick_alloc_object_rosalloc // Need to preserve r0 and r1 to the slow path. .macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel cbz r2, \slowPathLabel // Check null class - // Check class status. - ldr r3, [r2, #MIRROR_CLASS_STATUS_OFFSET] - cmp r3, #MIRROR_CLASS_STATUS_INITIALIZED - bne \slowPathLabel - // Add a fake dependence from the - // following access flag and size - // loads to the status load. - // This is to prevent those loads - // from being reordered above the - // status load and reading wrong - // values (an alternative is to use - // a load-acquire for the status). - eor r3, r3, r3 - add r2, r2, r3 - // Check access flags has - // kAccClassIsFinalizable. - ldr r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET] - tst r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE - bne \slowPathLabel // Load thread_local_pos (r12) and // thread_local_end (r3) with ldrd. // Check constraints for ldrd. @@ -1218,16 +1199,12 @@ END art_quick_alloc_object_rosalloc #endif ldrd r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET] sub r12, r3, r12 // Compute the remaining buf size. - ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET] // Load the object size (r3). + ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3). cmp r3, r12 // Check if it fits. OK to do this // before rounding up the object size // assuming the buf size alignment. bhi \slowPathLabel // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. - // Round up the object size by the - // object alignment. (addr + 7) & ~7. - add r3, r3, #OBJECT_ALIGNMENT_MASK - and r3, r3, #OBJECT_ALIGNMENT_MASK_TOGGLED // Reload old thread_local_pos (r0) // for the return value. ldr r0, [r9, #THREAD_LOCAL_POS_OFFSET] @@ -1244,7 +1221,7 @@ END art_quick_alloc_object_rosalloc // the fields of the class. // Alternatively we could use "ishst" // if we use load-acquire for the - // class status load.) + // object size load.) dmb ish bx lr .endm diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 3f87a1443a..76e503cf94 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2027,48 +2027,24 @@ END art_quick_alloc_object_rosalloc ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel .endm +// TODO: delete ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since it is the same as +// ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED. .macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel - ldr w3, [x2, #MIRROR_CLASS_STATUS_OFFSET] // Check class status. - cmp x3, #MIRROR_CLASS_STATUS_INITIALIZED - bne \slowPathLabel - // Add a fake dependence from the - // following access flag and size - // loads to the status load. - // This is to prevent those loads - // from being reordered above the - // status load and reading wrong - // values (an alternative is to use - // a load-acquire for the status). - eor x3, x3, x3 - add x2, x2, x3 ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel .endm .macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel - // Check access flags has - // kAccClassIsFinalizable. - ldr w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET] - tbnz x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT, \slowPathLabel - // Load thread_local_pos (x4) and - // thread_local_end (x5). ldr x4, [xSELF, #THREAD_LOCAL_POS_OFFSET] ldr x5, [xSELF, #THREAD_LOCAL_END_OFFSET] - sub x6, x5, x4 // Compute the remaining buf size. - ldr w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET] // Load the object size (x7). - cmp x7, x6 // Check if it fits. OK to do this - // before rounding up the object size - // assuming the buf size alignment. + ldr w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7). + add x6, x4, x7 // Add object size to tlab pos. + cmp x6, x5 // Check if it fits, overflow works + // since the tlab pos and end are 32 + // bit values. bhi \slowPathLabel // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. - // Round up the object size by the - // object alignment. (addr + 7) & ~7. - add x7, x7, #OBJECT_ALIGNMENT_MASK - and x7, x7, #OBJECT_ALIGNMENT_MASK_TOGGLED - // Move old thread_local_pos to x0 - // for the return value. mov x0, x4 - add x5, x0, x7 - str x5, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. + str x6, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. ldr x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. add x5, x5, #1 str x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] @@ -2080,7 +2056,7 @@ END art_quick_alloc_object_rosalloc // the fields of the class. // Alternatively we could use "ishst" // if we use load-acquire for the - // class status load.) + // object size load.) dmb ish ret .endm diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 282f10d410..67df081456 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -990,32 +990,23 @@ END_FUNCTION art_quick_alloc_object_rosalloc MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel) testl %edx, %edx // Check null class jz VAR(slowPathLabel) - // Check class status. - cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx) - jne VAR(slowPathLabel) // No fake dependence needed on x86 // between status and flags load, // since each load is a load-acquire, // no loads reordering. - // Check access flags has - // kAccClassIsFinalizable - testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx) - jnz VAR(slowPathLabel) movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread movl THREAD_LOCAL_END_OFFSET(%ebx), %edi // Load thread_local_end. subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi // Compute the remaining buffer size. - movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %esi // Load the object size. + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %esi // Load the object size. cmpl %edi, %esi // Check if it fits. OK to do this // before rounding up the object size // assuming the buf size alignment. ja VAR(slowPathLabel) - addl LITERAL(OBJECT_ALIGNMENT_MASK), %esi // Align the size by 8. (addr + 7) & ~7. - andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %esi movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax // Load thread_local_pos // as allocated object. addl %eax, %esi // Add the object size. movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx) // Update thread_local_pos. - addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx) // Increase thread_local_objects. + incl THREAD_LOCAL_OBJECTS_OFFSET(%ebx) // Increase thread_local_objects. // Store the class pointer in the header. // No fence needed for x86. POISON_HEAP_REF edx diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 06ff7ab802..c568715002 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1020,21 +1020,12 @@ MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel) END_MACRO // The common fast path code for art_quick_alloc_object_resolved_region_tlab. +// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as +// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH. // // RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value. // RCX: scratch, r8: Thread::Current(). MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel) - // Check class status. - cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx) - jne RAW_VAR(slowPathLabel) - // No fake dependence needed on x86 - // between status and flags load, - // since each load is a load-acquire, - // no loads reordering. - // Check access flags has - // kAccClassIsFinalizable - testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx) - jnz RAW_VAR(slowPathLabel) ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel)) END_MACRO @@ -1044,19 +1035,16 @@ END_MACRO // RCX: scratch, r8: Thread::Current(). MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread - movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx // Load the object size. + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdx), %ecx // Load the object size. movq THREAD_LOCAL_POS_OFFSET(%r8), %rax - leaq OBJECT_ALIGNMENT_MASK(%rax, %rcx), %rcx // Add size to pos, note that these + addq %rax, %rcx // Add size to pos, note that these // are both 32 bit ints, overflow // will cause the add to be past the // end of the thread local region. - // Also sneak in alignment mask add. - andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %rcx // Align the size by 8. (addr + 7) & - // ~7. cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx // Check if it fits. ja RAW_VAR(slowPathLabel) movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8) // Update thread_local_pos. - addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increase thread_local_objects. + incq THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increase thread_local_objects. // Store the class pointer in the // header. // No fence needed for x86. diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 102b993686..f4addf72a0 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -178,10 +178,13 @@ ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET, #define MIRROR_CLASS_OBJECT_SIZE_OFFSET (96 + MIRROR_OBJECT_HEADER_SIZE) ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET, art::mirror::Class::ObjectSizeOffset().Int32Value()) -#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE) +#define MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE) +ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET, + art::mirror::Class::ObjectSizeAllocFastPathOffset().Int32Value()) +#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE) ADD_TEST_EQ(MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET, art::mirror::Class::PrimitiveTypeOffset().Int32Value()) -#define MIRROR_CLASS_STATUS_OFFSET (108 + MIRROR_OBJECT_HEADER_SIZE) +#define MIRROR_CLASS_STATUS_OFFSET (112 + MIRROR_OBJECT_HEADER_SIZE) ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET, art::mirror::Class::StatusOffset().Int32Value()) diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc index e0ff3ddc0f..3be39a1f79 100644 --- a/runtime/class_linker_test.cc +++ b/runtime/class_linker_test.cc @@ -586,6 +586,8 @@ struct ClassOffsets : public CheckOffsets<mirror::Class> { addOffset(OFFSETOF_MEMBER(mirror::Class, num_reference_static_fields_), "numReferenceStaticFields"); addOffset(OFFSETOF_MEMBER(mirror::Class, object_size_), "objectSize"); + addOffset(OFFSETOF_MEMBER(mirror::Class, object_size_alloc_fast_path_), + "objectSizeAllocFastPath"); addOffset(OFFSETOF_MEMBER(mirror::Class, primitive_type_), "primitiveType"); addOffset(OFFSETOF_MEMBER(mirror::Class, reference_instance_offsets_), "referenceInstanceOffsets"); diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h index 0f2aac2790..26b8e8a8f4 100644 --- a/runtime/mirror/class-inl.h +++ b/runtime/mirror/class-inl.h @@ -861,6 +861,8 @@ inline void Class::InitializeClassVisitor::operator()( klass->SetPrimitiveType(Primitive::kPrimNot); // Default to not being primitive. klass->SetDexClassDefIndex(DexFile::kDexNoIndex16); // Default to no valid class def index. klass->SetDexTypeIndex(DexFile::kDexNoIndex16); // Default to no valid type index. + // Default to force slow path until initialized. + klass->SetObjectSizeAllocFastPath(std::numeric_limits<int32_t>::max()); } inline void Class::SetAccessFlags(uint32_t new_access_flags) { diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc index 96b3345fab..b60c5731ae 100644 --- a/runtime/mirror/class.cc +++ b/runtime/mirror/class.cc @@ -100,9 +100,21 @@ void Class::SetStatus(Handle<Class> h_this, Status new_status, Thread* self) { } static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32"); if (Runtime::Current()->IsActiveTransaction()) { - h_this->SetField32Volatile<true>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status); + h_this->SetField32Volatile<true>(StatusOffset(), new_status); } else { - h_this->SetField32Volatile<false>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status); + h_this->SetField32Volatile<false>(StatusOffset(), new_status); + } + + // Setting the object size alloc fast path needs to be after the status write so that if the + // alloc path sees a valid object size, we would know that it's initialized as long as it has a + // load-acquire/fake dependency. + if (new_status == kStatusInitialized && !h_this->IsVariableSize()) { + uint32_t object_size = RoundUp(h_this->GetObjectSize(), kObjectAlignment); + if (h_this->IsFinalizable()) { + // Finalizable objects must always go slow path. + object_size = std::numeric_limits<int32_t>::max(); + } + h_this->SetObjectSizeAllocFastPath(object_size); } if (!class_linker_initialized) { @@ -1209,5 +1221,13 @@ int32_t Class::GetInnerClassFlags(Handle<Class> h_this, int32_t default_value) { return flags; } +void Class::SetObjectSizeAllocFastPath(uint32_t new_object_size) { + if (Runtime::Current()->IsActiveTransaction()) { + SetField32Volatile<true>(ObjectSizeAllocFastPathOffset(), new_object_size); + } else { + SetField32Volatile<false>(ObjectSizeAllocFastPathOffset(), new_object_size); + } +} + } // namespace mirror } // namespace art diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index 1751f32c3e..f8f414b3d9 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -586,6 +586,9 @@ class MANAGED Class FINAL : public Object { static MemberOffset ObjectSizeOffset() { return OFFSET_OF_OBJECT_MEMBER(Class, object_size_); } + static MemberOffset ObjectSizeAllocFastPathOffset() { + return OFFSET_OF_OBJECT_MEMBER(Class, object_size_alloc_fast_path_); + } void SetObjectSize(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(!IsVariableSize()); @@ -593,6 +596,8 @@ class MANAGED Class FINAL : public Object { return SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), new_object_size); } + void SetObjectSizeAllocFastPath(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_); + void SetObjectSizeWithoutChecks(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_) { // Not called within a transaction. @@ -1457,6 +1462,10 @@ class MANAGED Class FINAL : public Object { // See also class_size_. uint32_t object_size_; + // Aligned object size for allocation fast path. The value is max int if the object is + // uninitialized or finalizable. Not currently used for variable sized objects. + uint32_t object_size_alloc_fast_path_; + // The lower 16 bits contains a Primitive::Type value. The upper 16 // bits contains the size shift of the primitive type. uint32_t primitive_type_; |