diff options
author | 2017-01-06 14:40:07 +0000 | |
---|---|---|
committer | 2017-01-11 10:34:10 +0000 | |
commit | 2b615ba29c4dfcf54aaf44955f2eac60f5080b2e (patch) | |
tree | 0a2fe5f9243645a054d4aa094bff5a69cc1abb88 | |
parent | c9a060f2688599d4a402ee6234db46c2e9b7463f (diff) |
Make object allocation entrypoints only take a class.
Change motivated by:
- Dex cache compression: having the allocation fast path do a
dex cache lookup will be too expensive. So instead, rely on the
compiler having direct access to the class (either through BSS for
AOT, or JIT tables for JIT).
- Inlining: the entrypoints relied on the caller of the allocation to
have the same dex cache as the outer method (stored at the bottom of
the stack). This meant we could not inline methods from a different
dex file that do allocations. By avoiding the dex cache lookup in
the entrypoint, we can now remove this restriction.
Code expansion on average for Docs/Gms/FB/Framework (go/lem numbers):
- Around 0.8% on arm64
- Around 1% for x64, arm
- Around 1.5% on x86
Test: test-art-host, test-art-target, ART_USE_READ_BARRIER=true/false
Test: test-art-host, test-art-target, ART_DEFAULT_GC_TYPE=SS ART_USE_TLAB=true
Change-Id: I41f3748bb4d251996aaf6a90fae4c50176f9295f
37 files changed, 301 insertions, 904 deletions
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 86d92ff0b5..4180e0e6c9 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -487,7 +487,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(20U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(164 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(163 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 8a7f6d3a33..1dd526f404 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -3936,7 +3936,6 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { } else { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } locations->SetOut(Location::RegisterLocation(R0)); } @@ -3954,7 +3953,7 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5c33fe1a7d..240e39df4b 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -4738,7 +4738,6 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(LocationFrom(kArtMethodRegister)); } else { locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); } locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } @@ -4756,7 +4755,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 00ad3e34b7..cf4d94deea 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -3948,7 +3948,6 @@ void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { } else { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); } locations->SetOut(LocationFrom(r0)); } @@ -3970,7 +3969,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 01e0dac33e..29f8b2aa3c 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -5900,7 +5900,6 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } @@ -5917,7 +5916,7 @@ void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 36690c0569..dd3f0fee5a 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3841,7 +3841,6 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } @@ -3859,7 +3858,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0abe85540c..786bc50345 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -4150,7 +4150,6 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { } else { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } @@ -4166,7 +4165,7 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); DCHECK(!codegen_->IsLeafMethod()); } } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 903844fcdb..06b48c489c 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -4038,7 +4038,6 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } locations->SetOut(Location::RegisterLocation(RAX)); } @@ -4055,7 +4054,7 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); DCHECK(!codegen_->IsLeafMethod()); } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index e5d05e9e6d..c970e5cbba 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1428,15 +1428,6 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return false; } - if (current->IsNewInstance() && - (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it is using an entrypoint" - << " with access checks"; - // Allocation entrypoint does not handle inlined frames. - return false; - } - if (current->IsNewArray() && (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) { VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 768b1d80a1..009d549547 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -917,11 +917,11 @@ bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t d bool finalizable; bool needs_access_check = NeedsAccessCheck(type_index, dex_cache, &finalizable); - // Only the non-resolved entrypoint handles the finalizable class case. If we + // Only the access check entrypoint handles the finalizable class case. If we // need access checks, then we haven't resolved the method and the class may // again be finalizable. QuickEntrypointEnum entrypoint = (finalizable || needs_access_check) - ? kQuickAllocObject + ? kQuickAllocObjectWithChecks : kQuickAllocObjectInitialized; if (outer_dex_cache.Get() != dex_cache.Get()) { @@ -946,7 +946,6 @@ bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t d AppendInstruction(new (arena_) HNewInstance( cls, - graph_->GetCurrentMethod(), dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index e1e0b7da2d..db1b277990 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -3774,10 +3774,9 @@ class HCompare FINAL : public HBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HCompare); }; -class HNewInstance FINAL : public HExpression<2> { +class HNewInstance FINAL : public HExpression<1> { public: HNewInstance(HInstruction* cls, - HCurrentMethod* current_method, uint32_t dex_pc, dex::TypeIndex type_index, const DexFile& dex_file, @@ -3791,7 +3790,6 @@ class HNewInstance FINAL : public HExpression<2> { SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagFinalizable>(finalizable); SetRawInputAt(0, cls); - SetRawInputAt(1, current_method); } dex::TypeIndex GetTypeIndex() const { return type_index_; } diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f9ac3a0f72..db7c1fbb06 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -134,39 +134,6 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { } } -void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { - HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass(); - const bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); - // Change the entrypoint to kQuickAllocObject if either: - // - the class is finalizable (only kQuickAllocObject handles finalizable classes), - // - the class needs access checks (we do not know if it's finalizable), - // - or the load class has only one use. - if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) { - instruction->SetEntrypoint(kQuickAllocObject); - instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex().index_), 0); - if (has_only_one_use) { - // We've just removed the only use of the HLoadClass. Since we don't run DCE after this pass, - // do it manually if possible. - if (!load_class->CanThrow()) { - // If the load class can not throw, it has no side effects and can be removed if there is - // only one use. - load_class->GetBlock()->RemoveInstruction(load_class); - } else if (!instruction->GetEnvironment()->IsFromInlinedInvoke() && - CanMoveClinitCheck(load_class, instruction)) { - // The allocation entry point that deals with access checks does not work with inlined - // methods, so we need to check whether this allocation comes from an inlined method. - // We also need to make the same check as for moving clinit check, whether the HLoadClass - // has the clinit check responsibility or not (HLoadClass can throw anyway). - // If it needed access checks, we delegate the access check to the allocation. - if (load_class->NeedsAccessCheck()) { - instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); - } - load_class->GetBlock()->RemoveInstruction(load_class); - } - } - } -} - bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition, HInstruction* user) const { if (condition->GetNext() != user) { diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index a6791482a7..c128227654 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -44,7 +44,6 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; - void VisitNewInstance(HNewInstance* instruction) OVERRIDE; bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const; bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const; diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index ab4f9e944c..a3fce02970 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5610,7 +5610,7 @@ const char* const VixlJniHelpersResults[] = { " 214: ecbd 8a10 vpop {s16-s31}\n", " 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", " 21c: 4660 mov r0, ip\n", - " 21e: f8d9 c2b0 ldr.w ip, [r9, #688] ; 0x2b0\n", + " 21e: f8d9 c2ac ldr.w ip, [r9, #684] ; 0x2ac\n", " 222: 47e0 blx ip\n", nullptr }; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index a71ab4b53c..4d4ebdcad8 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -1124,28 +1124,23 @@ END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -ENTRY art_quick_alloc_object_rosalloc +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc). +ENTRY art_quick_alloc_object_resolved_rosalloc // Fast path rosalloc allocation. - // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current - // r2, r3, r12: free. - ldr r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32] // Load dex cache resolved types array - // Load the class (r2) - ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - cbz r2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class - + // r0: type/return value, r9: Thread::Current + // r1, r2, r3, r12: free. ldr r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local // allocation stack has room. // TODO: consider using ldrd. ldr r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] cmp r3, r12 - bhs .Lart_quick_alloc_object_rosalloc_slow_path + bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path - ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) + ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread // local allocation. Also does the // initialized and finalizable checks. - bhs .Lart_quick_alloc_object_rosalloc_slow_path + bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Compute the rosalloc bracket index // from the size. Since the size is // already aligned we can combine the @@ -1159,7 +1154,7 @@ ENTRY art_quick_alloc_object_rosalloc // Load the free list head (r3). This // will be the return val. ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] - cbz r3, .Lart_quick_alloc_object_rosalloc_slow_path + cbz r3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head // and update the list head with the @@ -1172,8 +1167,8 @@ ENTRY art_quick_alloc_object_rosalloc #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET #error "Class pointer needs to overwrite next pointer." #endif - POISON_HEAP_REF r2 - str r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET] + POISON_HEAP_REF r0 + str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] // Fence. This is "ish" not "ishst" so // that it also ensures ordering of // the class status load with respect @@ -1204,20 +1199,20 @@ ENTRY art_quick_alloc_object_rosalloc mov r0, r3 // Set the return value and return. bx lr -.Lart_quick_alloc_object_rosalloc_slow_path: +.Lart_quick_alloc_object_resolved_rosalloc_slow_path: SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC - mov r2, r9 @ pass Thread::Current - bl artAllocObjectFromCodeRosAlloc @ (uint32_t type_idx, Method* method, Thread*) + mov r1, r9 @ pass Thread::Current + bl artAllocObjectFromCodeResolvedRosAlloc @ (mirror::Class* cls, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_rosalloc +END art_quick_alloc_object_resolved_rosalloc -// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. +// The common fast path code for art_quick_alloc_object_resolved_tlab +// and art_quick_alloc_object_resolved_region_tlab. // -// r0: type_idx/return value, r1: ArtMethod*, r2: class, r9: Thread::Current, r3, r12: free. -// Need to preserve r0 and r1 to the slow path. -.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel - cbz r2, \slowPathLabel // Check null class +// r0: type r9: Thread::Current, r1, r2, r3, r12: free. +// Need to preserve r0 to the slow path. +.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel // Load thread_local_pos (r12) and // thread_local_end (r3) with ldrd. // Check constraints for ldrd. @@ -1232,14 +1227,14 @@ END art_quick_alloc_object_rosalloc // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. // Reload old thread_local_pos (r0) // for the return value. - ldr r0, [r9, #THREAD_LOCAL_POS_OFFSET] - add r1, r0, r3 + ldr r2, [r9, #THREAD_LOCAL_POS_OFFSET] + add r1, r2, r3 str r1, [r9, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. ldr r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. add r1, r1, #1 str r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] - POISON_HEAP_REF r2 - str r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. + POISON_HEAP_REF r0 + str r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. // Fence. This is "ish" not "ishst" so // that the code after this allocation // site will see the right values in @@ -1247,71 +1242,46 @@ END art_quick_alloc_object_rosalloc // Alternatively we could use "ishst" // if we use load-acquire for the // object size load.) + mov r0, r2 dmb ish bx lr .endm -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). -ENTRY art_quick_alloc_object_tlab +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_tlab, TLAB). +ENTRY art_quick_alloc_object_resolved_tlab // Fast path tlab allocation. - // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current - // r2, r3, r12: free. + // r0: type, r9: Thread::Current + // r1, r2, r3, r12: free. #if defined(USE_READ_BARRIER) mvn r0, #0 // Read barrier not supported here. bx lr // Return -1. #endif - ldr r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32] // Load dex cache resolved types array - // Load the class (r2) - ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path -.Lart_quick_alloc_object_tlab_slow_path: + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path +.Lart_quick_alloc_object_resolved_tlab_slow_path: SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC. - mov r2, r9 // Pass Thread::Current. - bl artAllocObjectFromCodeTLAB // (uint32_t type_idx, Method* method, Thread*) + mov r1, r9 // Pass Thread::Current. + bl artAllocObjectFromCodeResolvedTLAB // (mirror::Class* klass, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_tlab +END art_quick_alloc_object_resolved_tlab -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) -ENTRY art_quick_alloc_object_region_tlab +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +ENTRY art_quick_alloc_object_resolved_region_tlab // Fast path tlab allocation. - // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current, r2, r3, r12: free. + // r0: type, r9: Thread::Current, r1, r2, r3, r12: free. #if !defined(USE_READ_BARRIER) eor r0, r0, r0 // Read barrier must be enabled here. sub r0, r0, #1 // Return -1. bx lr #endif - ldr r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32] // Load dex cache resolved types array - // Load the class (r2) - ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - // Read barrier for class load. - ldr r3, [r9, #THREAD_IS_GC_MARKING_OFFSET] - cbnz r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking: - cbz r2, .Lart_quick_alloc_object_region_tlab_slow_path // Null check for loading lock word. - // Check lock word for mark bit, if marked do the allocation. - ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET] - ands r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED - bne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: - // The read barrier slow path. Mark - // the class. - push {r0, r1, r3, lr} // Save registers. r3 is pushed only - // to align sp by 16 bytes. - mov r0, r2 // Pass the class as the first param. - bl artReadBarrierMark - mov r2, r0 // Get the (marked) class back. - pop {r0, r1, r3, lr} - b .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_region_tlab_slow_path: + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path +.Lart_quick_alloc_object_resolved_region_tlab_slow_path: SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC. - mov r2, r9 // Pass Thread::Current. - bl artAllocObjectFromCodeRegionTLAB // (uint32_t type_idx, Method* method, Thread*) + mov r1, r9 // Pass Thread::Current. + bl artAllocObjectFromCodeResolvedRegionTLAB // (mirror::Class* klass, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_region_tlab +END art_quick_alloc_object_resolved_region_tlab /* * Called by managed code when the value in rSUSPEND has been decremented to 0. diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index b88515f21f..8b1e0388c6 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1669,7 +1669,6 @@ END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS // Comment out allocators that have arm64 specific asm. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) @@ -1682,27 +1681,23 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -ENTRY art_quick_alloc_object_rosalloc +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc). +ENTRY art_quick_alloc_object_resolved_rosalloc // Fast path rosalloc allocation. - // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current - // x2-x7: free. - ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array - // Load the class (x2) - ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - cbz x2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class + // x0: type, xSELF(x19): Thread::Current + // x1-x7: free. ldr x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local // allocation stack has room. // ldp won't work due to large offset. ldr x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] cmp x3, x4 - bhs .Lart_quick_alloc_object_rosalloc_slow_path - ldr w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3) + bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path + ldr w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3) cmp x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread // local allocation. Also does the // finalizable and initialization // checks. - bhs .Lart_quick_alloc_object_rosalloc_slow_path + bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Compute the rosalloc bracket index // from the size. Since the size is // already aligned we can combine the @@ -1715,7 +1710,7 @@ ENTRY art_quick_alloc_object_rosalloc // Load the free list head (x3). This // will be the return val. ldr x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] - cbz x3, .Lart_quick_alloc_object_rosalloc_slow_path + cbz x3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. ldr x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head // and update the list head with the @@ -1728,8 +1723,8 @@ ENTRY art_quick_alloc_object_rosalloc #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET #error "Class pointer needs to overwrite next pointer." #endif - POISON_HEAP_REF w2 - str w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET] + POISON_HEAP_REF w0 + str w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET] // Fence. This is "ish" not "ishst" so // that it also ensures ordering of // the object size load with respect @@ -1759,13 +1754,13 @@ ENTRY art_quick_alloc_object_rosalloc mov x0, x3 // Set the return value and return. ret -.Lart_quick_alloc_object_rosalloc_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC - mov x2, xSELF // pass Thread::Current - bl artAllocObjectFromCodeRosAlloc // (uint32_t type_idx, Method* method, Thread*) +.Lart_quick_alloc_object_resolved_rosalloc_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC + mov x1, xSELF // pass Thread::Current + bl artAllocObjectFromCodeResolvedRosAlloc // (mirror::Class* klass, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_rosalloc +END art_quick_alloc_object_resolved_rosalloc // The common fast path code for art_quick_alloc_array_region_tlab. @@ -1834,16 +1829,6 @@ END art_quick_alloc_object_rosalloc ret .endm -// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. -// -// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current -// x3-x7: free. -// Need to preserve x0 and x1 to the slow path. -.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel - cbz x2, \slowPathLabel // Check null class - ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel -.endm - // TODO: delete ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since it is the same as // ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED. .macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel @@ -1853,20 +1838,18 @@ END art_quick_alloc_object_rosalloc .macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel ldr x4, [xSELF, #THREAD_LOCAL_POS_OFFSET] ldr x5, [xSELF, #THREAD_LOCAL_END_OFFSET] - ldr w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7). + ldr w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7). add x6, x4, x7 // Add object size to tlab pos. cmp x6, x5 // Check if it fits, overflow works // since the tlab pos and end are 32 // bit values. bhi \slowPathLabel - // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. - mov x0, x4 str x6, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. ldr x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. add x5, x5, #1 str x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] - POISON_HEAP_REF w2 - str w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. + POISON_HEAP_REF w0 + str w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. // Fence. This is "ish" not "ishst" so // that the code after this allocation // site will see the right values in @@ -1874,91 +1857,52 @@ END art_quick_alloc_object_rosalloc // Alternatively we could use "ishst" // if we use load-acquire for the // object size load.) + mov x0, x4 dmb ish ret .endm -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). -ENTRY art_quick_alloc_object_tlab +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). +ENTRY art_quick_alloc_object_resolved_tlab // Fast path tlab allocation. - // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current - // x2-x7: free. + // x0: type, xSELF(x19): Thread::Current + // x1-x7: free. #if defined(USE_READ_BARRIER) mvn x0, xzr // Read barrier not supported here. ret // Return -1. #endif - ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array - // Load the class (x2) - ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path -.Lart_quick_alloc_object_tlab_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. - mov x2, xSELF // Pass Thread::Current. - bl artAllocObjectFromCodeTLAB // (uint32_t type_idx, Method* method, Thread*) + ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_object_resolved_tlab_slow_path +.Lart_quick_alloc_object_resolved_tlab_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. + mov x1, xSELF // Pass Thread::Current. + bl artAllocObjectFromCodeResolvedTLAB // (mirror::Class*, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_tlab +END art_quick_alloc_object_resolved_tlab // The common code for art_quick_alloc_object_*region_tlab -.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved, read_barrier +.macro GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB name, entrypoint, fast_path ENTRY \name // Fast path region tlab allocation. - // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current - // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index. - // x2-x7: free. + // x0: type, xSELF(x19): Thread::Current + // x1-x7: free. #if !defined(USE_READ_BARRIER) mvn x0, xzr // Read barrier must be enabled here. ret // Return -1. #endif -.if \is_resolved - mov x2, x0 // class is actually stored in x0 already -.else - ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array - // Load the class (x2) - ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - // If the class is null, go slow path. The check is required to read the lock word. - cbz w2, .Lslow_path\name -.endif -.if \read_barrier - // Most common case: GC is not marking. - ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] - cbnz x3, .Lmarking\name -.endif .Ldo_allocation\name: \fast_path .Lslow_path\name -.Lmarking\name: -.if \read_barrier - // GC is marking, check the lock word of the class for the mark bit. - // Class is not null, check mark bit in lock word. - ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - // If the bit is not zero, do the allocation. - tbnz w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name - // The read barrier slow path. Mark - // the class. - SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32 // Save registers (x0, x1, lr). - SAVE_REG xLR, 24 // Align sp by 16 bytes. - mov x0, x2 // Pass the class as the first param. - bl artReadBarrierMark - mov x2, x0 // Get the (marked) class back. - RESTORE_REG xLR, 24 - RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32 // Restore registers. - b .Ldo_allocation\name -.endif .Lslow_path\name: SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. - mov x2, xSELF // Pass Thread::Current. - bl \entrypoint // (uint32_t type_idx, Method* method, Thread*) + mov x1, xSELF // Pass Thread::Current. + bl \entrypoint // (mirror::Class*, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER END \name .endm -// Use ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since the null check is already done in GENERATE_ALLOC_OBJECT_TLAB. -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 0, 1 -// No read barrier for the resolved or initialized cases since the caller is responsible for the -// read barrier due to the to-space invariant. -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1, 0 -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1, 0 +GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED +GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED // TODO: We could use this macro for the normal tlab allocator too. diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 3e8cdc9374..964ea563b0 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -1831,116 +1831,10 @@ END \name // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -ENTRY art_quick_alloc_object_rosalloc - - # Fast path rosalloc allocation - # a0: type_idx - # a1: ArtMethod* - # s1: Thread::Current - # ----------------------------- - # t0: class - # t1: object size - # t2: rosalloc run - # t3: thread stack top offset - # t4: thread stack bottom offset - # v0: free list head - # - # t5, t6 : temps - - lw $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_32($a1) # Load dex cache resolved types - # array. - - sll $t5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT # Shift the value. - addu $t5, $t0, $t5 # Compute the index. - lw $t0, 0($t5) # Load class (t0). - beqz $t0, .Lart_quick_alloc_object_rosalloc_slow_path - - li $t6, MIRROR_CLASS_STATUS_INITIALIZED - lw $t5, MIRROR_CLASS_STATUS_OFFSET($t0) # Check class status. - bne $t5, $t6, .Lart_quick_alloc_object_rosalloc_slow_path - - # Add a fake dependence from the following access flag and size loads to the status load. This - # is to prevent those loads from being reordered above the status load and reading wrong values. - xor $t5, $t5, $t5 - addu $t0, $t0, $t5 - - lw $t5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0) # Check if access flags has - li $t6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE # kAccClassIsFinalizable. - and $t6, $t5, $t6 - bnez $t6, .Lart_quick_alloc_object_rosalloc_slow_path - - lw $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) # Check if thread local allocation - lw $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1) # stack has any room left. - bgeu $t3, $t4, .Lart_quick_alloc_object_rosalloc_slow_path - - lw $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0) # Load object size (t1). - li $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE # Check if size is for a thread local - # allocation. - bgtu $t1, $t5, .Lart_quick_alloc_object_rosalloc_slow_path - - # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket - # quantum size and divide by the quantum size and subtract by 1. - - addiu $t1, $t1, -1 # Decrease obj size and shift right - srl $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT # by quantum. - - sll $t2, $t1, POINTER_SIZE_SHIFT - addu $t2, $t2, $s1 - lw $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2) # Load rosalloc run (t2). - - # Load the free list head (v0). - # NOTE: this will be the return val. - - lw $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) - beqz $v0, .Lart_quick_alloc_object_rosalloc_slow_path - nop - - # Load the next pointer of the head and update the list head with the next pointer. - - lw $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0) - sw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) - - # Store the class pointer in the header. This also overwrites the first pointer. The offsets are - # asserted to match. - -#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET -#error "Class pointer needs to overwrite next pointer." -#endif - - POISON_HEAP_REF $t0 - sw $t0, MIRROR_OBJECT_CLASS_OFFSET($v0) - - # Push the new object onto the thread local allocation stack and increment the thread local - # allocation stack top. - - sw $v0, 0($t3) - addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE - sw $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) - - # Decrement the size of the free list. - - lw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) - addiu $t5, $t5, -1 - sw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) - - sync # Fence. - - jalr $zero, $ra - nop - - .Lart_quick_alloc_object_rosalloc_slow_path: - - SETUP_SAVE_REFS_ONLY_FRAME - la $t9, artAllocObjectFromCodeRosAlloc - jalr $t9 - move $a2, $s1 # Pass self as argument. - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER - -END art_quick_alloc_object_rosalloc -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 0861d2d73e..2a18d53aea 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -1775,107 +1775,9 @@ END \name // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -ENTRY art_quick_alloc_object_rosalloc - - # Fast path rosalloc allocation - # a0: type_idx - # a1: ArtMethod* - # s1: Thread::Current - # ----------------------------- - # t0: class - # t1: object size - # t2: rosalloc run - # t3: thread stack top offset - # a4: thread stack bottom offset - # v0: free list head - # - # a5, a6 : temps - - ld $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_64($a1) # Load dex cache resolved types array. - - dsll $a5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT # Shift the value. - daddu $a5, $t0, $a5 # Compute the index. - lwu $t0, 0($a5) # Load class (t0). - beqzc $t0, .Lart_quick_alloc_object_rosalloc_slow_path - - li $a6, MIRROR_CLASS_STATUS_INITIALIZED - lwu $a5, MIRROR_CLASS_STATUS_OFFSET($t0) # Check class status. - bnec $a5, $a6, .Lart_quick_alloc_object_rosalloc_slow_path - - # Add a fake dependence from the following access flag and size loads to the status load. This - # is to prevent those loads from being reordered above the status load and reading wrong values. - xor $a5, $a5, $a5 - daddu $t0, $t0, $a5 - - lwu $a5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0) # Check if access flags has - li $a6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE # kAccClassIsFinalizable. - and $a6, $a5, $a6 - bnezc $a6, .Lart_quick_alloc_object_rosalloc_slow_path - - ld $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) # Check if thread local allocation stack - ld $a4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1) # has any room left. - bgeuc $t3, $a4, .Lart_quick_alloc_object_rosalloc_slow_path - - lwu $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0) # Load object size (t1). - li $a5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE # Check if size is for a thread local - # allocation. - bltuc $a5, $t1, .Lart_quick_alloc_object_rosalloc_slow_path - - # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket - # quantum size and divide by the quantum size and subtract by 1. - daddiu $t1, $t1, -1 # Decrease obj size and shift right by - dsrl $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT # quantum. - - dsll $t2, $t1, POINTER_SIZE_SHIFT - daddu $t2, $t2, $s1 - ld $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2) # Load rosalloc run (t2). - - # Load the free list head (v0). - # NOTE: this will be the return val. - ld $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) - beqzc $v0, .Lart_quick_alloc_object_rosalloc_slow_path - - # Load the next pointer of the head and update the list head with the next pointer. - ld $a5, ROSALLOC_SLOT_NEXT_OFFSET($v0) - sd $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) - - # Store the class pointer in the header. This also overwrites the first pointer. The offsets are - # asserted to match. - -#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET -#error "Class pointer needs to overwrite next pointer." -#endif - - POISON_HEAP_REF $t0 - sw $t0, MIRROR_OBJECT_CLASS_OFFSET($v0) - - # Push the new object onto the thread local allocation stack and increment the thread local - # allocation stack top. - sd $v0, 0($t3) - daddiu $t3, $t3, COMPRESSED_REFERENCE_SIZE - sd $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) - - # Decrement the size of the free list. - lw $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) - addiu $a5, $a5, -1 - sw $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) - - sync # Fence. - - jalr $zero, $ra - .cpreturn # Restore gp from t8 in branch delay slot. - -.Lart_quick_alloc_object_rosalloc_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME - jal artAllocObjectFromCodeRosAlloc - move $a2 ,$s1 # Pass self as argument. - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER - -END art_quick_alloc_object_rosalloc - -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S index db2fdcabea..abd9046174 100644 --- a/runtime/arch/quick_alloc_entrypoints.S +++ b/runtime/arch/quick_alloc_entrypoints.S @@ -15,15 +15,13 @@ */ .macro GENERATE_ALLOC_ENTRYPOINTS c_suffix, cxx_suffix -// Called by managed code to allocate an object. -TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object of a resolved class. -TWO_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object of an initialized class. -TWO_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object when the caller doesn't know whether it has access // to the created type. -TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_DOWNCALL art_quick_alloc_object_with_checks\c_suffix, artAllocObjectFromCodeWithChecks\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array. THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array of a resolve class. @@ -61,14 +59,12 @@ GENERATE_ALLOC_ENTRYPOINTS _region_tlab_instrumented, RegionTLABInstrumented // Generate the allocation entrypoints for each allocator. This is used as an alternative to // GNERATE_ALL_ALLOC_ENTRYPOINTS for selectively implementing allocation fast paths in // hand-written assembly. -#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + ONE_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + ONE_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + ONE_ARG_DOWNCALL art_quick_alloc_object_with_checks ## c_suffix, artAllocObjectFromCodeWithChecks ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \ THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \ @@ -93,8 +89,7 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR // This is to be separately defined for each architecture to allow a hand-written assembly fast path. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) @@ -109,8 +104,7 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR // This is to be separately defined for each architecture to allow a hand-written assembly fast path. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB) @@ -129,7 +123,6 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR .endm .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc) @@ -142,7 +135,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented) @@ -156,8 +148,7 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMal GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented) // This is to be separately defined for each architecture to allow a hand-written assembly fast path. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc) @@ -169,7 +160,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented) @@ -182,7 +172,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAl GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer) @@ -195,7 +184,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented) @@ -208,7 +196,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, B GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented) @@ -221,7 +208,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstr GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region) @@ -234,7 +220,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented) @@ -247,7 +232,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionI GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented) diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 9e385f839f..ee65fa8ab0 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -1062,12 +1062,8 @@ TEST_F(StubTest, AllocObject) { EXPECT_FALSE(self->IsExceptionPending()); { - // Use an arbitrary method from c to use as referrer - size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex().index_), // type_idx - // arbitrary - reinterpret_cast<size_t>(c->GetVirtualMethod(0, kRuntimePointerSize)), - 0U, - StubTest::GetEntrypoint(self, kQuickAllocObject), + size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U, + StubTest::GetEntrypoint(self, kQuickAllocObjectWithChecks), self); EXPECT_FALSE(self->IsExceptionPending()); @@ -1078,8 +1074,6 @@ TEST_F(StubTest, AllocObject) { } { - // We can use null in the second argument as we do not need a method here (not used in - // resolved/initialized cases) size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U, StubTest::GetEntrypoint(self, kQuickAllocObjectResolved), self); @@ -1092,8 +1086,6 @@ TEST_F(StubTest, AllocObject) { } { - // We can use null in the second argument as we do not need a method here (not used in - // resolved/initialized cases) size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U, StubTest::GetEntrypoint(self, kQuickAllocObjectInitialized), self); diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index c6f4c0346f..62c29cf268 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -956,52 +956,42 @@ END_MACRO // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -DEFINE_FUNCTION art_quick_alloc_object_rosalloc +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc). +DEFINE_FUNCTION art_quick_alloc_object_resolved_rosalloc // Fast path rosalloc allocation. - // eax: uint32_t type_idx/return value, ecx: ArtMethod* - // ebx, edx: free - PUSH edi - movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx // Load dex cache resolved types array - // Load the class (edx) - movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx - testl %edx, %edx // Check null class - jz .Lart_quick_alloc_object_rosalloc_slow_path - + // eax: type/return value + // ecx, ebx, edx: free movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread // Check if the thread local allocation // stack has room - movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi - cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %edi - jae .Lart_quick_alloc_object_rosalloc_slow_path + movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %ecx + cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %ecx + jae .Lart_quick_alloc_object_resolved_rosalloc_slow_path - movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %edi // Load the object size (edi) + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%eax), %ecx // Load the object size (ecx) // Check if the size is for a thread // local allocation. Also does the // finalizable and initialization check. - cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %edi - ja .Lart_quick_alloc_object_rosalloc_slow_path - shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %edi // Calculate the rosalloc bracket index + cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %ecx + ja .Lart_quick_alloc_object_resolved_rosalloc_slow_path + shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %ecx // Calculate the rosalloc bracket index // from object size. // Load thread local rosalloc run (ebx) // Subtract __SIZEOF_POINTER__ to subtract // one from edi as there is no 0 byte run // and the size is already aligned. - movl (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%ebx, %edi, __SIZEOF_POINTER__), %ebx + movl (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%ebx, %ecx, __SIZEOF_POINTER__), %ebx // Load free_list head (edi), // this will be the return value. - movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %edi - test %edi, %edi - jz .Lart_quick_alloc_object_rosalloc_slow_path + movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %ecx + jecxz .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Point of no slow path. Won't go to - // the slow path from here on. Ok to - // clobber eax and ecx. - movl %edi, %eax + // the slow path from here on. // Load the next pointer of the head // and update head of free list with // next pointer - movl ROSALLOC_SLOT_NEXT_OFFSET(%eax), %edi - movl %edi, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx) + movl ROSALLOC_SLOT_NEXT_OFFSET(%ecx), %edx + movl %edx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx) // Decrement size of free list by 1 decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%ebx) // Store the class pointer in the @@ -1011,141 +1001,104 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET #error "Class pointer needs to overwrite next pointer." #endif - POISON_HEAP_REF edx - movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax) + POISON_HEAP_REF eax + movl %eax, MIRROR_OBJECT_CLASS_OFFSET(%ecx) movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread // Push the new object onto the thread // local allocation stack and // increment the thread local // allocation stack top. - movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi - movl %eax, (%edi) - addl LITERAL(COMPRESSED_REFERENCE_SIZE), %edi - movl %edi, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx) + movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %eax + movl %ecx, (%eax) + addl LITERAL(COMPRESSED_REFERENCE_SIZE), %eax + movl %eax, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx) // No fence needed for x86. - POP edi + movl %ecx, %eax // Move object to return register ret -.Lart_quick_alloc_object_rosalloc_slow_path: - POP edi +.Lart_quick_alloc_object_resolved_rosalloc_slow_path: SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx // save ref containing registers for GC // Outgoing argument set up - PUSH eax // alignment padding + subl LITERAL(8), %esp // alignment padding pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH ecx PUSH eax - call SYMBOL(artAllocObjectFromCodeRosAlloc) // cxx_name(arg0, arg1, Thread*) + call SYMBOL(artAllocObjectFromCodeResolvedRosAlloc) // cxx_name(arg0, Thread*) addl LITERAL(16), %esp // pop arguments CFI_ADJUST_CFA_OFFSET(-16) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception -END_FUNCTION art_quick_alloc_object_rosalloc +END_FUNCTION art_quick_alloc_object_resolved_rosalloc -// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. +// The common fast path code for art_quick_alloc_object_resolved_tlab +// and art_quick_alloc_object_resolved_region_tlab. // -// EAX: type_idx/return_value, ECX: ArtMethod*, EDX: the class. -MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel) - testl %edx, %edx // Check null class - jz VAR(slowPathLabel) +// EAX: type/return_value +MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel) movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread movl THREAD_LOCAL_END_OFFSET(%ebx), %edi // Load thread_local_end. subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi // Compute the remaining buffer size. - movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %esi // Load the object size. - cmpl %edi, %esi // Check if it fits. + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%eax), %ecx // Load the object size. + cmpl %edi, %ecx // Check if it fits. ja VAR(slowPathLabel) - movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax // Load thread_local_pos + movl THREAD_LOCAL_POS_OFFSET(%ebx), %edx // Load thread_local_pos // as allocated object. - addl %eax, %esi // Add the object size. - movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx) // Update thread_local_pos. + addl %edx, %ecx // Add the object size. + movl %ecx, THREAD_LOCAL_POS_OFFSET(%ebx) // Update thread_local_pos. incl THREAD_LOCAL_OBJECTS_OFFSET(%ebx) // Increase thread_local_objects. // Store the class pointer in the header. // No fence needed for x86. - POISON_HEAP_REF edx - movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax) + POISON_HEAP_REF eax + movl %eax, MIRROR_OBJECT_CLASS_OFFSET(%edx) + movl %edx, %eax POP edi - POP esi ret // Fast path succeeded. END_MACRO -// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. -MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) +// The common slow path code for art_quick_alloc_object_resolved_tlab +// and art_quick_alloc_object_resolved_region_tlab. +MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH, cxx_name) POP edi - POP esi SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx // save ref containing registers for GC // Outgoing argument set up - PUSH eax // alignment padding + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH ecx PUSH eax - call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) + call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) addl LITERAL(16), %esp CFI_ADJUST_CFA_OFFSET(-16) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be called +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be called // for CC if the GC is not marking. -DEFINE_FUNCTION art_quick_alloc_object_tlab +DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab // Fast path tlab allocation. - // EAX: uint32_t type_idx/return value, ECX: ArtMethod*. - // EBX, EDX: free. - PUSH esi + // EAX: type + // EBX, ECX, EDX: free. PUSH edi - movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx // Load dex cache resolved types array - // Might need to break down into multiple instructions to get the base address in a register. - // Load the class - movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path -.Lart_quick_alloc_object_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB -END_FUNCTION art_quick_alloc_object_tlab - -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB). -DEFINE_FUNCTION art_quick_alloc_object_region_tlab + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path +.Lart_quick_alloc_object_resolved_tlab_slow_path: + ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB +END_FUNCTION art_quick_alloc_object_resolved_tlab + +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB). +DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab // Fast path region tlab allocation. - // EAX: uint32_t type_idx/return value, ECX: ArtMethod*. - // EBX, EDX: free. + // EAX: type/return value + // EBX, ECX, EDX: free. #if !defined(USE_READ_BARRIER) int3 int3 #endif - PUSH esi PUSH edi - movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx // Load dex cache resolved types array - // Might need to break down into multiple instructions to get the base address in a register. - // Load the class - movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx - // Read barrier for class load. - cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit - // Null check so that we can load the lock word. - testl %edx, %edx - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit - // Check the mark bit, if it is 1 return. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: - // The read barrier slow path. Mark the class. - PUSH eax - PUSH ecx - // Outgoing argument set up - subl MACRO_LITERAL(8), %esp // Alignment padding - CFI_ADJUST_CFA_OFFSET(8) - PUSH edx // Pass the class as the first param. - call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) - movl %eax, %edx - addl MACRO_LITERAL(12), %esp - CFI_ADJUST_CFA_OFFSET(-12) - POP ecx - POP eax - jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_region_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB -END_FUNCTION art_quick_alloc_object_region_tlab + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path +.Lart_quick_alloc_object_resolved_region_tlab_slow_path: + ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB +END_FUNCTION art_quick_alloc_object_resolved_region_tlab + DEFINE_FUNCTION art_quick_resolve_string SETUP_SAVE_EVERYTHING_FRAME ebx, ebx diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 4c46b08a9e..facd563428 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -983,7 +983,6 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS // Comment out allocators that have x86_64 specific asm. // Region TLAB: -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) @@ -996,11 +995,9 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) // Normal TLAB: -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB) @@ -1009,29 +1006,25 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -DEFINE_FUNCTION art_quick_alloc_object_rosalloc + +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc). +DEFINE_FUNCTION art_quick_alloc_object_resolved_rosalloc // Fast path rosalloc allocation. - // RDI: type_idx, RSI: ArtMethod*, RAX: return value - // RDX, RCX, R8, R9: free. - movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array - // Load the class (edx) - movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx - testl %edx, %edx // Check null class - jz .Lart_quick_alloc_object_rosalloc_slow_path + // RDI: mirror::Class*, RAX: return value + // RSI, RDX, RCX, R8, R9: free. // Check if the thread local // allocation stack has room. movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread movq THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx // rcx = alloc stack top. cmpq THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx - jae .Lart_quick_alloc_object_rosalloc_slow_path + jae .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Load the object size - movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdx), %eax + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax // Check if the size is for a thread // local allocation. Also does the // initialized and finalizable checks. cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax - ja .Lart_quick_alloc_object_rosalloc_slow_path + ja .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Compute the rosalloc bracket index // from the size. shrq LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax @@ -1045,7 +1038,7 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc // will be the return val. movq (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax testq %rax, %rax - jz .Lart_quick_alloc_object_rosalloc_slow_path + jz .Lart_quick_alloc_object_resolved_rosalloc_slow_path // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi. // Push the new object onto the thread // local allocation stack and @@ -1066,17 +1059,17 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET #error "Class pointer needs to overwrite next pointer." #endif - POISON_HEAP_REF edx - movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax) + POISON_HEAP_REF edi + movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) // Decrement the size of the free list decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9) // No fence necessary for x86. ret -.Lart_quick_alloc_object_rosalloc_slow_path: +.Lart_quick_alloc_object_resolved_rosalloc_slow_path: SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC // Outgoing argument set up - movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() - call SYMBOL(artAllocObjectFromCodeRosAlloc) // cxx_name(arg0, arg1, Thread*) + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call SYMBOL(artAllocObjectFromCodeResolvedRosAlloc) // cxx_name(arg0, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_FUNCTION art_quick_alloc_object_rosalloc @@ -1095,19 +1088,19 @@ END_MACRO // TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as // ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH. // -// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value. -// RCX: scratch, r8: Thread::Current(). +// RDI: the class, RAX: return value. +// RCX, RSI, RDX: scratch, r8: Thread::Current(). MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel) ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel)) END_MACRO // The fast path code for art_quick_alloc_object_initialized_region_tlab. // -// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value. -// RCX: scratch, r8: Thread::Current(). +// RDI: the class, RSI: ArtMethod*, RAX: return value. +// RCX, RSI, RDX: scratch, r8: Thread::Current(). MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread - movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdx), %ecx // Load the object size. + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size. movq THREAD_LOCAL_POS_OFFSET(%r8), %rax addq %rax, %rcx // Add size to pos, note that these // are both 32 bit ints, overflow @@ -1120,8 +1113,8 @@ MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) // Store the class pointer in the // header. // No fence needed for x86. - POISON_HEAP_REF edx - movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax) + POISON_HEAP_REF edi + movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) ret // Fast path succeeded. END_MACRO @@ -1164,12 +1157,14 @@ MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, slowPathLabel) ret // Fast path succeeded. END_MACRO -// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. + +// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab +// and art_quick_alloc_object_{resolved, initialized}_region_tlab. MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC // Outgoing argument set up - movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() - call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO @@ -1184,26 +1179,11 @@ MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name) RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be -// called with CC if the GC is not active. -DEFINE_FUNCTION art_quick_alloc_object_tlab - // RDI: uint32_t type_idx, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array - // Might need to break down into multiple instructions to get the base address in a register. - // Load the class - movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path -.Lart_quick_alloc_object_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB -END_FUNCTION art_quick_alloc_object_tlab - // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be // called with CC if the GC is not active. DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - movq %rdi, %rdx + // RDI: mirror::Class* klass + // RDX, RSI, RCX, R8, R9: free. RAX: return val. ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path .Lart_quick_alloc_object_resolved_tlab_slow_path: ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB @@ -1212,9 +1192,8 @@ END_FUNCTION art_quick_alloc_object_resolved_tlab // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB). // May be called with CC if the GC is not active. DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - movq %rdi, %rdx + // RDI: mirror::Class* klass + // RDX, RSI, RCX, R8, R9: free. RAX: return val. ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path .Lart_quick_alloc_object_initialized_tlab_slow_path: ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB @@ -1292,49 +1271,12 @@ DEFINE_FUNCTION art_quick_alloc_array_resolved_region_tlab ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB END_FUNCTION art_quick_alloc_array_resolved_region_tlab -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB). -DEFINE_FUNCTION art_quick_alloc_object_region_tlab - // Fast path region tlab allocation. - // RDI: uint32_t type_idx, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - ASSERT_USE_READ_BARRIER - movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array - movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx // Load the class - // Null check so that we can load the lock word. - testl %edx, %edx - jz .Lart_quick_alloc_object_region_tlab_slow_path - // Since we have allocation entrypoint switching, we know the GC is marking. - // Check the mark bit, if it is 0, do the read barrier mark. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: - // Use resolved one since we already did the null check. - ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: - // The read barrier slow path. Mark the class. - PUSH rdi - PUSH rsi - subq LITERAL(8), %rsp // 16 byte alignment - // Outgoing argument set up - movq %rdx, %rdi // Pass the class as the first param. - call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) - movq %rax, %rdx - addq LITERAL(8), %rsp - POP rsi - POP rdi - jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_region_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB -END_FUNCTION art_quick_alloc_object_region_tlab - // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB). DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab // Fast path region tlab allocation. - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. + // RDI: mirror::Class* klass + // RDX, RSI, RCX, R8, R9: free. RAX: return val. ASSERT_USE_READ_BARRIER - // No read barrier since the caller is responsible for that. - movq %rdi, %rdx ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path .Lart_quick_alloc_object_resolved_region_tlab_slow_path: ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB @@ -1343,10 +1285,9 @@ END_FUNCTION art_quick_alloc_object_resolved_region_tlab // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB). DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab // Fast path region tlab allocation. - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. + // RDI: mirror::Class* klass + // RDX, RSI, RCX, R8, R9: free. RAX: return val. ASSERT_USE_READ_BARRIER - movq %rdi, %rdx // No read barrier since the caller is responsible for that. ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path .Lart_quick_alloc_object_initialized_region_tlab_slow_path: diff --git a/runtime/asm_support.h b/runtime/asm_support.h index e4972da13d..bfdddf7b03 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -98,7 +98,7 @@ ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET, ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET, art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value()) // Offset of field Thread::tlsPtr_.thread_local_objects. -#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__) +#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET, art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value()) // Offset of field Thread::tlsPtr_.mterp_current_ibase. diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index 14c9c21356..469c45c10c 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -127,43 +127,21 @@ inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveTyp self->GetManagedStack()->GetTopQuickFrame(), type, true /* do_caller_check */); } -template <const bool kAccessCheck> -ALWAYS_INLINE -inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx, - ArtMethod* method, - Thread* self, - bool* slow_path) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - PointerSize pointer_size = class_linker->GetImagePointerSize(); - mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, pointer_size); - if (UNLIKELY(klass == nullptr)) { - klass = class_linker->ResolveType(type_idx, method); +ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(mirror::Class* klass, + Thread* self, + bool* slow_path) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Roles::uninterruptible_) { + if (UNLIKELY(!klass->IsInstantiable())) { + self->ThrowNewException("Ljava/lang/InstantiationError;", klass->PrettyDescriptor().c_str()); *slow_path = true; - if (klass == nullptr) { - DCHECK(self->IsExceptionPending()); - return nullptr; // Failure - } else { - DCHECK(!self->IsExceptionPending()); - } + return nullptr; // Failure } - if (kAccessCheck) { - if (UNLIKELY(!klass->IsInstantiable())) { - self->ThrowNewException("Ljava/lang/InstantiationError;", klass->PrettyDescriptor().c_str()); - *slow_path = true; - return nullptr; // Failure - } - if (UNLIKELY(klass->IsClassClass())) { - ThrowIllegalAccessError(nullptr, "Class %s is inaccessible", - klass->PrettyDescriptor().c_str()); - *slow_path = true; - return nullptr; // Failure - } - mirror::Class* referrer = method->GetDeclaringClass(); - if (UNLIKELY(!referrer->CanAccess(klass))) { - ThrowIllegalAccessErrorClass(referrer, klass); - *slow_path = true; - return nullptr; // Failure - } + if (UNLIKELY(klass->IsClassClass())) { + ThrowIllegalAccessError(nullptr, "Class %s is inaccessible", + klass->PrettyDescriptor().c_str()); + *slow_path = true; + return nullptr; // Failure } if (UNLIKELY(!klass->IsInitialized())) { StackHandleScope<1> hs(self); @@ -191,7 +169,9 @@ inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx, ALWAYS_INLINE inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass, Thread* self, - bool* slow_path) { + bool* slow_path) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Roles::uninterruptible_) { if (UNLIKELY(!klass->IsInitialized())) { StackHandleScope<1> hs(self); Handle<mirror::Class> h_class(hs.NewHandle(klass)); @@ -213,18 +193,15 @@ inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass, return klass; } -// Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it -// cannot be resolved, throw an error. If it can, use it to create an instance. -// When verification/compiler hasn't been able to verify access, optionally perform an access -// check. -template <bool kAccessCheck, bool kInstrumented> +// Allocate an instance of klass. Throws InstantationError if klass is not instantiable, +// or IllegalAccessError if klass is j.l.Class. Performs a clinit check too. +template <bool kInstrumented> ALWAYS_INLINE -inline mirror::Object* AllocObjectFromCode(dex::TypeIndex type_idx, - ArtMethod* method, +inline mirror::Object* AllocObjectFromCode(mirror::Class* klass, Thread* self, gc::AllocatorType allocator_type) { bool slow_path = false; - mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self, &slow_path); + klass = CheckObjectAlloc(klass, self, &slow_path); if (UNLIKELY(slow_path)) { if (klass == nullptr) { return nullptr; diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h index 7cc136e227..4794610ca8 100644 --- a/runtime/entrypoints/entrypoint_utils.h +++ b/runtime/entrypoints/entrypoint_utils.h @@ -45,27 +45,10 @@ class OatQuickMethodHeader; class ScopedObjectAccessAlreadyRunnable; class Thread; -template <const bool kAccessCheck> -ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx, - ArtMethod* method, - Thread* self, - bool* slow_path) - REQUIRES_SHARED(Locks::mutator_lock_) - REQUIRES(!Roles::uninterruptible_); - -ALWAYS_INLINE inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass, - Thread* self, - bool* slow_path) - REQUIRES_SHARED(Locks::mutator_lock_) - REQUIRES(!Roles::uninterruptible_); - // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it // cannot be resolved, throw an error. If it can, use it to create an instance. -// When verification/compiler hasn't been able to verify access, optionally perform an access -// check. -template <bool kAccessCheck, bool kInstrumented> -ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(dex::TypeIndex type_idx, - ArtMethod* method, +template <bool kInstrumented> +ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(mirror::Class* klass, Thread* self, gc::AllocatorType allocator_type) REQUIRES_SHARED(Locks::mutator_lock_) diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc index 82bb8e53c6..2d06508069 100644 --- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc @@ -29,87 +29,58 @@ namespace art { static constexpr bool kUseTlabFastPath = true; +template <bool kInitialized, + bool kFinalize, + bool kInstrumented, + gc::AllocatorType allocator_type> +static ALWAYS_INLINE inline mirror::Object* artAllocObjectFromCode( + mirror::Class* klass, + Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + DCHECK(klass != nullptr); + if (kUseTlabFastPath && !kInstrumented && allocator_type == gc::kAllocatorTypeTLAB) { + if (kInitialized || klass->IsInitialized()) { + if (!kFinalize || !klass->IsFinalizable()) { + size_t byte_count = klass->GetObjectSize(); + byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); + mirror::Object* obj; + if (LIKELY(byte_count < self->TlabSize())) { + obj = self->AllocTlab(byte_count); + DCHECK(obj != nullptr) << "AllocTlab can't fail"; + obj->SetClass(klass); + if (kUseBakerReadBarrier) { + obj->AssertReadBarrierState(); + } + QuasiAtomic::ThreadFenceForConstructor(); + return obj; + } + } + } + } + if (kInitialized) { + return AllocObjectFromCodeInitialized<kInstrumented>(klass, self, allocator_type); + } else if (!kFinalize) { + return AllocObjectFromCodeResolved<kInstrumented>(klass, self, allocator_type); + } else { + return AllocObjectFromCode<kInstrumented>(klass, self, allocator_type); + } +} + #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \ -extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \ - uint32_t type_idx, ArtMethod* method, Thread* self) \ +extern "C" mirror::Object* artAllocObjectFromCodeWithChecks##suffix##suffix2( \ + mirror::Class* klass, Thread* self) \ REQUIRES_SHARED(Locks::mutator_lock_) { \ - ScopedQuickEntrypointChecks sqec(self); \ - if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \ - mirror::Class* klass = method->GetDexCacheResolvedType<false>(dex::TypeIndex(type_idx), \ - kRuntimePointerSize); \ - if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \ - size_t byte_count = klass->GetObjectSize(); \ - byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \ - mirror::Object* obj; \ - if (LIKELY(byte_count < self->TlabSize())) { \ - obj = self->AllocTlab(byte_count); \ - DCHECK(obj != nullptr) << "AllocTlab can't fail"; \ - obj->SetClass(klass); \ - if (kUseBakerReadBarrier) { \ - obj->AssertReadBarrierState(); \ - } \ - QuasiAtomic::ThreadFenceForConstructor(); \ - return obj; \ - } \ - } \ - } \ - return AllocObjectFromCode<false, instrumented_bool>(dex::TypeIndex(type_idx), \ - method, \ - self, \ - allocator_type); \ + return artAllocObjectFromCode<false, true, instrumented_bool, allocator_type>(klass, self); \ } \ extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \ - mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \ + mirror::Class* klass, Thread* self) \ REQUIRES_SHARED(Locks::mutator_lock_) { \ - ScopedQuickEntrypointChecks sqec(self); \ - if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \ - if (LIKELY(klass->IsInitialized())) { \ - size_t byte_count = klass->GetObjectSize(); \ - byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \ - mirror::Object* obj; \ - if (LIKELY(byte_count < self->TlabSize())) { \ - obj = self->AllocTlab(byte_count); \ - DCHECK(obj != nullptr) << "AllocTlab can't fail"; \ - obj->SetClass(klass); \ - if (kUseBakerReadBarrier) { \ - obj->AssertReadBarrierState(); \ - } \ - QuasiAtomic::ThreadFenceForConstructor(); \ - return obj; \ - } \ - } \ - } \ - return AllocObjectFromCodeResolved<instrumented_bool>(klass, self, allocator_type); \ + return artAllocObjectFromCode<false, false, instrumented_bool, allocator_type>(klass, self); \ } \ extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \ - mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \ - REQUIRES_SHARED(Locks::mutator_lock_) { \ - ScopedQuickEntrypointChecks sqec(self); \ - if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \ - size_t byte_count = klass->GetObjectSize(); \ - byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \ - mirror::Object* obj; \ - if (LIKELY(byte_count < self->TlabSize())) { \ - obj = self->AllocTlab(byte_count); \ - DCHECK(obj != nullptr) << "AllocTlab can't fail"; \ - obj->SetClass(klass); \ - if (kUseBakerReadBarrier) { \ - obj->AssertReadBarrierState(); \ - } \ - QuasiAtomic::ThreadFenceForConstructor(); \ - return obj; \ - } \ - } \ - return AllocObjectFromCodeInitialized<instrumented_bool>(klass, self, allocator_type); \ -} \ -extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \ - uint32_t type_idx, ArtMethod* method, Thread* self) \ + mirror::Class* klass, Thread* self) \ REQUIRES_SHARED(Locks::mutator_lock_) { \ - ScopedQuickEntrypointChecks sqec(self); \ - return AllocObjectFromCode<true, instrumented_bool>(dex::TypeIndex(type_idx), \ - method, \ - self, \ - allocator_type); \ + return artAllocObjectFromCode<true, false, instrumented_bool, allocator_type>(klass, self); \ } \ extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \ uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \ @@ -220,10 +191,9 @@ GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(RegionTLAB, gc::kAllocatorTypeRegionTLAB) extern "C" void* art_quick_alloc_array##suffix(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_array_resolved##suffix(mirror::Class* klass, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, int32_t, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_resolved##suffix(mirror::Class* klass, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_initialized##suffix(mirror::Class* klass, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_resolved##suffix(mirror::Class* klass); \ +extern "C" void* art_quick_alloc_object_initialized##suffix(mirror::Class* klass); \ +extern "C" void* art_quick_alloc_object_with_checks##suffix(mirror::Class* klass); \ extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_string_from_bytes##suffix(void*, int32_t, int32_t, int32_t); \ @@ -233,9 +203,9 @@ extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, int32_t, extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(mirror::Class* klass, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(mirror::Class* klass, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(mirror::Class* klass, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(mirror::Class* klass); \ +extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(mirror::Class* klass); \ +extern "C" void* art_quick_alloc_object_with_checks##suffix##_instrumented(mirror::Class* klass); \ extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_string_from_bytes##suffix##_instrumented(void*, int32_t, int32_t, int32_t); \ @@ -246,10 +216,9 @@ void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrument qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \ qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix##_instrumented; \ qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix##_instrumented; \ - qpoints->pAllocObject = art_quick_alloc_object##suffix##_instrumented; \ qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix##_instrumented; \ qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix##_instrumented; \ - qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \ + qpoints->pAllocObjectWithChecks = art_quick_alloc_object_with_checks##suffix##_instrumented; \ qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \ qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \ qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix##_instrumented; \ @@ -259,10 +228,9 @@ void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrument qpoints->pAllocArray = art_quick_alloc_array##suffix; \ qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix; \ qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix; \ - qpoints->pAllocObject = art_quick_alloc_object##suffix; \ qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix; \ qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix; \ - qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \ + qpoints->pAllocObjectWithChecks = art_quick_alloc_object_with_checks##suffix; \ qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \ qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \ qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix; \ diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index a1c5082c93..0911aeb0f4 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -23,10 +23,9 @@ V(AllocArray, void*, uint32_t, int32_t, ArtMethod*) \ V(AllocArrayResolved, void*, mirror::Class*, int32_t, ArtMethod*) \ V(AllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*) \ - V(AllocObject, void*, uint32_t, ArtMethod*) \ - V(AllocObjectResolved, void*, mirror::Class*, ArtMethod*) \ - V(AllocObjectInitialized, void*, mirror::Class*, ArtMethod*) \ - V(AllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*) \ + V(AllocObjectResolved, void*, mirror::Class*) \ + V(AllocObjectInitialized, void*, mirror::Class*) \ + V(AllocObjectWithChecks, void*, mirror::Class*) \ V(CheckAndAllocArray, void*, uint32_t, int32_t, ArtMethod*) \ V(CheckAndAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*) \ V(AllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t) \ diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 12836602d5..6866abb6ae 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -122,9 +122,9 @@ class EntrypointsOrderTest : public CommonRuntimeTest { // Skip across the entrypoints structures. + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*)); + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*)); @@ -156,13 +156,13 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArray, pAllocArrayResolved, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved, pAllocArrayWithAccessCheck, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayWithAccessCheck, pAllocObject, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObject, pAllocObjectResolved, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayWithAccessCheck, pAllocObjectResolved, + sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectResolved, pAllocObjectInitialized, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectInitialized, pAllocObjectWithAccessCheck, + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectInitialized, pAllocObjectWithChecks, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectWithAccessCheck, pCheckAndAllocArray, + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectWithChecks, pCheckAndAllocArray, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckAndAllocArray, pCheckAndAllocArrayWithAccessCheck, sizeof(void*)); diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index b0d7fb247a..d7dfcd4408 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -508,9 +508,8 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { - obj = AllocObjectFromCode<do_access_check, true>( - dex::TypeIndex(inst->VRegB_21c()), - shadow_frame.GetMethod(), + obj = AllocObjectFromCode<true>( + c.Ptr(), self, Runtime::Current()->GetHeap()->GetCurrentAllocator()); } diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index c8c1563ff6..369c2614a7 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -375,10 +375,9 @@ extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { - obj = AllocObjectFromCode<false, true>(dex::TypeIndex(inst->VRegB_21c()), - shadow_frame->GetMethod(), - self, - Runtime::Current()->GetHeap()->GetCurrentAllocator()); + obj = AllocObjectFromCode<true>(c, + self, + Runtime::Current()->GetHeap()->GetCurrentAllocator()); } } if (UNLIKELY(obj == nullptr)) { diff --git a/runtime/oat.h b/runtime/oat.h index 1fd906dc1b..dc103e2b52 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '9', '4', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '9', '5', '\0' }; // alloc entrypoints change static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/thread.cc b/runtime/thread.cc index aff12ff4c1..33c6a40320 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2627,10 +2627,9 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { QUICK_ENTRY_POINT_INFO(pAllocArray) QUICK_ENTRY_POINT_INFO(pAllocArrayResolved) QUICK_ENTRY_POINT_INFO(pAllocArrayWithAccessCheck) - QUICK_ENTRY_POINT_INFO(pAllocObject) QUICK_ENTRY_POINT_INFO(pAllocObjectResolved) QUICK_ENTRY_POINT_INFO(pAllocObjectInitialized) - QUICK_ENTRY_POINT_INFO(pAllocObjectWithAccessCheck) + QUICK_ENTRY_POINT_INFO(pAllocObjectWithChecks) QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray) QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck) QUICK_ENTRY_POINT_INFO(pAllocStringFromBytes) diff --git a/runtime/thread.h b/runtime/thread.h index 411d85f015..6308851096 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -1416,7 +1416,7 @@ class Thread { stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr), frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0), last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr), - thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr), + thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr), @@ -1540,12 +1540,12 @@ class Thread { JniEntryPoints jni_entrypoints; QuickEntryPoints quick_entrypoints; + // Thread-local allocation pointer. Moved here to force alignment for thread_local_pos on ARM. + uint8_t* thread_local_start; // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for // potentially better performance. uint8_t* thread_local_pos; uint8_t* thread_local_end; - // Thread-local allocation pointer. - uint8_t* thread_local_start; size_t thread_local_objects; diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java index 5fd51e1dca..89b9cb45c3 100644 --- a/test/529-checker-unresolved/src/Main.java +++ b/test/529-checker-unresolved/src/Main.java @@ -192,13 +192,13 @@ public class Main extends UnresolvedSuperClass { /// CHECK-START: void Main.testLicm(int) licm (before) /// CHECK: <<Class:l\d+>> LoadClass loop:B2 /// CHECK-NEXT: <<Clinit:l\d+>> ClinitCheck [<<Class>>] loop:B2 - /// CHECK-NEXT: <<New:l\d+>> NewInstance [<<Clinit>>,<<Method:[i|j]\d+>>] loop:B2 + /// CHECK-NEXT: <<New:l\d+>> NewInstance [<<Clinit>>] loop:B2 /// CHECK-NEXT: InvokeUnresolved [<<New>>] loop:B2 /// CHECK-START: void Main.testLicm(int) licm (after) /// CHECK: <<Class:l\d+>> LoadClass loop:none /// CHECK-NEXT: <<Clinit:l\d+>> ClinitCheck [<<Class>>] loop:none - /// CHECK: <<New:l\d+>> NewInstance [<<Clinit>>,<<Method:[i|j]\d+>>] loop:B2 + /// CHECK: <<New:l\d+>> NewInstance [<<Clinit>>] loop:B2 /// CHECK-NEXT: InvokeUnresolved [<<New>>] loop:B2 static public void testLicm(int count) { // Test to make sure we keep the initialization check after loading an unresolved class. diff --git a/test/621-checker-new-instance/expected.txt b/test/621-checker-new-instance/expected.txt deleted file mode 100644 index e69de29bb2..0000000000 --- a/test/621-checker-new-instance/expected.txt +++ /dev/null diff --git a/test/621-checker-new-instance/info.txt b/test/621-checker-new-instance/info.txt deleted file mode 100644 index c27c45ca7f..0000000000 --- a/test/621-checker-new-instance/info.txt +++ /dev/null @@ -1 +0,0 @@ -Tests for removing useless load class. diff --git a/test/621-checker-new-instance/src/Main.java b/test/621-checker-new-instance/src/Main.java deleted file mode 100644 index 68a46449f0..0000000000 --- a/test/621-checker-new-instance/src/Main.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public class Main { - /// CHECK-START: java.lang.Object Main.newObject() prepare_for_register_allocation (before) - /// CHECK: LoadClass - /// CHECK: NewInstance - - /// CHECK-START: java.lang.Object Main.newObject() prepare_for_register_allocation (after) - /// CHECK-NOT: LoadClass - /// CHECK: NewInstance - public static Object newObject() { - return new Object(); - } - - /// CHECK-START: java.lang.Object Main.newFinalizableMayThrow() prepare_for_register_allocation (after) - /// CHECK: LoadClass - /// CHECK: NewInstance - public static Object newFinalizableMayThrow() { - return $inline$newFinalizableMayThrow(); - } - - public static Object $inline$newFinalizableMayThrow() { - return new FinalizableMayThrow(); - } - - public static void main(String[] args) { - newFinalizableMayThrow(); - newObject(); - } -} - -class FinalizableMayThrow { - // clinit may throw OOME. - static Object o = new Object(); - static String s; - public void finalize() { - s = "Test"; - } -} |