diff options
37 files changed, 301 insertions, 904 deletions
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 86d92ff0b5..4180e0e6c9 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -487,7 +487,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(20U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(164 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(163 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 8a7f6d3a33..1dd526f404 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -3936,7 +3936,6 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { } else { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } locations->SetOut(Location::RegisterLocation(R0)); } @@ -3954,7 +3953,7 @@ void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5c33fe1a7d..240e39df4b 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -4738,7 +4738,6 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(LocationFrom(kArtMethodRegister)); } else { locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); } locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } @@ -4756,7 +4755,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 00ad3e34b7..cf4d94deea 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -3948,7 +3948,6 @@ void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { } else { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); } locations->SetOut(LocationFrom(r0)); } @@ -3970,7 +3969,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 01e0dac33e..29f8b2aa3c 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -5900,7 +5900,6 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } @@ -5917,7 +5916,7 @@ void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 36690c0569..dd3f0fee5a 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3841,7 +3841,6 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } @@ -3859,7 +3858,7 @@ void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0abe85540c..786bc50345 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -4150,7 +4150,6 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { } else { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } @@ -4166,7 +4165,7 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); DCHECK(!codegen_->IsLeafMethod()); } } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 903844fcdb..06b48c489c 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -4038,7 +4038,6 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } locations->SetOut(Location::RegisterLocation(RAX)); } @@ -4055,7 +4054,7 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); DCHECK(!codegen_->IsLeafMethod()); } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index e5d05e9e6d..c970e5cbba 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1428,15 +1428,6 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return false; } - if (current->IsNewInstance() && - (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it is using an entrypoint" - << " with access checks"; - // Allocation entrypoint does not handle inlined frames. - return false; - } - if (current->IsNewArray() && (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) { VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 768b1d80a1..009d549547 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -917,11 +917,11 @@ bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t d bool finalizable; bool needs_access_check = NeedsAccessCheck(type_index, dex_cache, &finalizable); - // Only the non-resolved entrypoint handles the finalizable class case. If we + // Only the access check entrypoint handles the finalizable class case. If we // need access checks, then we haven't resolved the method and the class may // again be finalizable. QuickEntrypointEnum entrypoint = (finalizable || needs_access_check) - ? kQuickAllocObject + ? kQuickAllocObjectWithChecks : kQuickAllocObjectInitialized; if (outer_dex_cache.Get() != dex_cache.Get()) { @@ -946,7 +946,6 @@ bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t d AppendInstruction(new (arena_) HNewInstance( cls, - graph_->GetCurrentMethod(), dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index e1e0b7da2d..db1b277990 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -3774,10 +3774,9 @@ class HCompare FINAL : public HBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HCompare); }; -class HNewInstance FINAL : public HExpression<2> { +class HNewInstance FINAL : public HExpression<1> { public: HNewInstance(HInstruction* cls, - HCurrentMethod* current_method, uint32_t dex_pc, dex::TypeIndex type_index, const DexFile& dex_file, @@ -3791,7 +3790,6 @@ class HNewInstance FINAL : public HExpression<2> { SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagFinalizable>(finalizable); SetRawInputAt(0, cls); - SetRawInputAt(1, current_method); } dex::TypeIndex GetTypeIndex() const { return type_index_; } diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f9ac3a0f72..db7c1fbb06 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -134,39 +134,6 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { } } -void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { - HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass(); - const bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); - // Change the entrypoint to kQuickAllocObject if either: - // - the class is finalizable (only kQuickAllocObject handles finalizable classes), - // - the class needs access checks (we do not know if it's finalizable), - // - or the load class has only one use. - if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) { - instruction->SetEntrypoint(kQuickAllocObject); - instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex().index_), 0); - if (has_only_one_use) { - // We've just removed the only use of the HLoadClass. Since we don't run DCE after this pass, - // do it manually if possible. - if (!load_class->CanThrow()) { - // If the load class can not throw, it has no side effects and can be removed if there is - // only one use. - load_class->GetBlock()->RemoveInstruction(load_class); - } else if (!instruction->GetEnvironment()->IsFromInlinedInvoke() && - CanMoveClinitCheck(load_class, instruction)) { - // The allocation entry point that deals with access checks does not work with inlined - // methods, so we need to check whether this allocation comes from an inlined method. - // We also need to make the same check as for moving clinit check, whether the HLoadClass - // has the clinit check responsibility or not (HLoadClass can throw anyway). - // If it needed access checks, we delegate the access check to the allocation. - if (load_class->NeedsAccessCheck()) { - instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); - } - load_class->GetBlock()->RemoveInstruction(load_class); - } - } - } -} - bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition, HInstruction* user) const { if (condition->GetNext() != user) { diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index a6791482a7..c128227654 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -44,7 +44,6 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; - void VisitNewInstance(HNewInstance* instruction) OVERRIDE; bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const; bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const; diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index ab4f9e944c..a3fce02970 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5610,7 +5610,7 @@ const char* const VixlJniHelpersResults[] = { " 214: ecbd 8a10 vpop {s16-s31}\n", " 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", " 21c: 4660 mov r0, ip\n", - " 21e: f8d9 c2b0 ldr.w ip, [r9, #688] ; 0x2b0\n", + " 21e: f8d9 c2ac ldr.w ip, [r9, #684] ; 0x2ac\n", " 222: 47e0 blx ip\n", nullptr }; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index a71ab4b53c..4d4ebdcad8 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -1124,28 +1124,23 @@ END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -ENTRY art_quick_alloc_object_rosalloc +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc). +ENTRY art_quick_alloc_object_resolved_rosalloc // Fast path rosalloc allocation. - // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current - // r2, r3, r12: free. - ldr r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32] // Load dex cache resolved types array - // Load the class (r2) - ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - cbz r2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class - + // r0: type/return value, r9: Thread::Current + // r1, r2, r3, r12: free. ldr r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local // allocation stack has room. // TODO: consider using ldrd. ldr r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] cmp r3, r12 - bhs .Lart_quick_alloc_object_rosalloc_slow_path + bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path - ldr r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) + ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread // local allocation. Also does the // initialized and finalizable checks. - bhs .Lart_quick_alloc_object_rosalloc_slow_path + bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Compute the rosalloc bracket index // from the size. Since the size is // already aligned we can combine the @@ -1159,7 +1154,7 @@ ENTRY art_quick_alloc_object_rosalloc // Load the free list head (r3). This // will be the return val. ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] - cbz r3, .Lart_quick_alloc_object_rosalloc_slow_path + cbz r3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head // and update the list head with the @@ -1172,8 +1167,8 @@ ENTRY art_quick_alloc_object_rosalloc #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET #error "Class pointer needs to overwrite next pointer." #endif - POISON_HEAP_REF r2 - str r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET] + POISON_HEAP_REF r0 + str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] // Fence. This is "ish" not "ishst" so // that it also ensures ordering of // the class status load with respect @@ -1204,20 +1199,20 @@ ENTRY art_quick_alloc_object_rosalloc mov r0, r3 // Set the return value and return. bx lr -.Lart_quick_alloc_object_rosalloc_slow_path: +.Lart_quick_alloc_object_resolved_rosalloc_slow_path: SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC - mov r2, r9 @ pass Thread::Current - bl artAllocObjectFromCodeRosAlloc @ (uint32_t type_idx, Method* method, Thread*) + mov r1, r9 @ pass Thread::Current + bl artAllocObjectFromCodeResolvedRosAlloc @ (mirror::Class* cls, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_rosalloc +END art_quick_alloc_object_resolved_rosalloc -// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. +// The common fast path code for art_quick_alloc_object_resolved_tlab +// and art_quick_alloc_object_resolved_region_tlab. // -// r0: type_idx/return value, r1: ArtMethod*, r2: class, r9: Thread::Current, r3, r12: free. -// Need to preserve r0 and r1 to the slow path. -.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel - cbz r2, \slowPathLabel // Check null class +// r0: type r9: Thread::Current, r1, r2, r3, r12: free. +// Need to preserve r0 to the slow path. +.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel // Load thread_local_pos (r12) and // thread_local_end (r3) with ldrd. // Check constraints for ldrd. @@ -1232,14 +1227,14 @@ END art_quick_alloc_object_rosalloc // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. // Reload old thread_local_pos (r0) // for the return value. - ldr r0, [r9, #THREAD_LOCAL_POS_OFFSET] - add r1, r0, r3 + ldr r2, [r9, #THREAD_LOCAL_POS_OFFSET] + add r1, r2, r3 str r1, [r9, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. ldr r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. add r1, r1, #1 str r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] - POISON_HEAP_REF r2 - str r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. + POISON_HEAP_REF r0 + str r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. // Fence. This is "ish" not "ishst" so // that the code after this allocation // site will see the right values in @@ -1247,71 +1242,46 @@ END art_quick_alloc_object_rosalloc // Alternatively we could use "ishst" // if we use load-acquire for the // object size load.) + mov r0, r2 dmb ish bx lr .endm -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). -ENTRY art_quick_alloc_object_tlab +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_tlab, TLAB). +ENTRY art_quick_alloc_object_resolved_tlab // Fast path tlab allocation. - // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current - // r2, r3, r12: free. + // r0: type, r9: Thread::Current + // r1, r2, r3, r12: free. #if defined(USE_READ_BARRIER) mvn r0, #0 // Read barrier not supported here. bx lr // Return -1. #endif - ldr r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32] // Load dex cache resolved types array - // Load the class (r2) - ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path -.Lart_quick_alloc_object_tlab_slow_path: + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path +.Lart_quick_alloc_object_resolved_tlab_slow_path: SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC. - mov r2, r9 // Pass Thread::Current. - bl artAllocObjectFromCodeTLAB // (uint32_t type_idx, Method* method, Thread*) + mov r1, r9 // Pass Thread::Current. + bl artAllocObjectFromCodeResolvedTLAB // (mirror::Class* klass, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_tlab +END art_quick_alloc_object_resolved_tlab -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) -ENTRY art_quick_alloc_object_region_tlab +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +ENTRY art_quick_alloc_object_resolved_region_tlab // Fast path tlab allocation. - // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current, r2, r3, r12: free. + // r0: type, r9: Thread::Current, r1, r2, r3, r12: free. #if !defined(USE_READ_BARRIER) eor r0, r0, r0 // Read barrier must be enabled here. sub r0, r0, #1 // Return -1. bx lr #endif - ldr r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32] // Load dex cache resolved types array - // Load the class (r2) - ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - // Read barrier for class load. - ldr r3, [r9, #THREAD_IS_GC_MARKING_OFFSET] - cbnz r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking: - cbz r2, .Lart_quick_alloc_object_region_tlab_slow_path // Null check for loading lock word. - // Check lock word for mark bit, if marked do the allocation. - ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET] - ands r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED - bne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: - // The read barrier slow path. Mark - // the class. - push {r0, r1, r3, lr} // Save registers. r3 is pushed only - // to align sp by 16 bytes. - mov r0, r2 // Pass the class as the first param. - bl artReadBarrierMark - mov r2, r0 // Get the (marked) class back. - pop {r0, r1, r3, lr} - b .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_region_tlab_slow_path: + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path +.Lart_quick_alloc_object_resolved_region_tlab_slow_path: SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC. - mov r2, r9 // Pass Thread::Current. - bl artAllocObjectFromCodeRegionTLAB // (uint32_t type_idx, Method* method, Thread*) + mov r1, r9 // Pass Thread::Current. + bl artAllocObjectFromCodeResolvedRegionTLAB // (mirror::Class* klass, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_region_tlab +END art_quick_alloc_object_resolved_region_tlab /* * Called by managed code when the value in rSUSPEND has been decremented to 0. diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index b88515f21f..8b1e0388c6 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1669,7 +1669,6 @@ END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS // Comment out allocators that have arm64 specific asm. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) @@ -1682,27 +1681,23 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -ENTRY art_quick_alloc_object_rosalloc +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc). +ENTRY art_quick_alloc_object_resolved_rosalloc // Fast path rosalloc allocation. - // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current - // x2-x7: free. - ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array - // Load the class (x2) - ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - cbz x2, .Lart_quick_alloc_object_rosalloc_slow_path // Check null class + // x0: type, xSELF(x19): Thread::Current + // x1-x7: free. ldr x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local // allocation stack has room. // ldp won't work due to large offset. ldr x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] cmp x3, x4 - bhs .Lart_quick_alloc_object_rosalloc_slow_path - ldr w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3) + bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path + ldr w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3) cmp x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread // local allocation. Also does the // finalizable and initialization // checks. - bhs .Lart_quick_alloc_object_rosalloc_slow_path + bhs .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Compute the rosalloc bracket index // from the size. Since the size is // already aligned we can combine the @@ -1715,7 +1710,7 @@ ENTRY art_quick_alloc_object_rosalloc // Load the free list head (x3). This // will be the return val. ldr x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] - cbz x3, .Lart_quick_alloc_object_rosalloc_slow_path + cbz x3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. ldr x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head // and update the list head with the @@ -1728,8 +1723,8 @@ ENTRY art_quick_alloc_object_rosalloc #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET #error "Class pointer needs to overwrite next pointer." #endif - POISON_HEAP_REF w2 - str w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET] + POISON_HEAP_REF w0 + str w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET] // Fence. This is "ish" not "ishst" so // that it also ensures ordering of // the object size load with respect @@ -1759,13 +1754,13 @@ ENTRY art_quick_alloc_object_rosalloc mov x0, x3 // Set the return value and return. ret -.Lart_quick_alloc_object_rosalloc_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC - mov x2, xSELF // pass Thread::Current - bl artAllocObjectFromCodeRosAlloc // (uint32_t type_idx, Method* method, Thread*) +.Lart_quick_alloc_object_resolved_rosalloc_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC + mov x1, xSELF // pass Thread::Current + bl artAllocObjectFromCodeResolvedRosAlloc // (mirror::Class* klass, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_rosalloc +END art_quick_alloc_object_resolved_rosalloc // The common fast path code for art_quick_alloc_array_region_tlab. @@ -1834,16 +1829,6 @@ END art_quick_alloc_object_rosalloc ret .endm -// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. -// -// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current -// x3-x7: free. -// Need to preserve x0 and x1 to the slow path. -.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel - cbz x2, \slowPathLabel // Check null class - ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel -.endm - // TODO: delete ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since it is the same as // ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED. .macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel @@ -1853,20 +1838,18 @@ END art_quick_alloc_object_rosalloc .macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel ldr x4, [xSELF, #THREAD_LOCAL_POS_OFFSET] ldr x5, [xSELF, #THREAD_LOCAL_END_OFFSET] - ldr w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7). + ldr w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7). add x6, x4, x7 // Add object size to tlab pos. cmp x6, x5 // Check if it fits, overflow works // since the tlab pos and end are 32 // bit values. bhi \slowPathLabel - // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. - mov x0, x4 str x6, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. ldr x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. add x5, x5, #1 str x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET] - POISON_HEAP_REF w2 - str w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. + POISON_HEAP_REF w0 + str w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. // Fence. This is "ish" not "ishst" so // that the code after this allocation // site will see the right values in @@ -1874,91 +1857,52 @@ END art_quick_alloc_object_rosalloc // Alternatively we could use "ishst" // if we use load-acquire for the // object size load.) + mov x0, x4 dmb ish ret .endm -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). -ENTRY art_quick_alloc_object_tlab +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). +ENTRY art_quick_alloc_object_resolved_tlab // Fast path tlab allocation. - // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current - // x2-x7: free. + // x0: type, xSELF(x19): Thread::Current + // x1-x7: free. #if defined(USE_READ_BARRIER) mvn x0, xzr // Read barrier not supported here. ret // Return -1. #endif - ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array - // Load the class (x2) - ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path -.Lart_quick_alloc_object_tlab_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. - mov x2, xSELF // Pass Thread::Current. - bl artAllocObjectFromCodeTLAB // (uint32_t type_idx, Method* method, Thread*) + ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_object_resolved_tlab_slow_path +.Lart_quick_alloc_object_resolved_tlab_slow_path: + SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. + mov x1, xSELF // Pass Thread::Current. + bl artAllocObjectFromCodeResolvedTLAB // (mirror::Class*, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -END art_quick_alloc_object_tlab +END art_quick_alloc_object_resolved_tlab // The common code for art_quick_alloc_object_*region_tlab -.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved, read_barrier +.macro GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB name, entrypoint, fast_path ENTRY \name // Fast path region tlab allocation. - // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current - // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index. - // x2-x7: free. + // x0: type, xSELF(x19): Thread::Current + // x1-x7: free. #if !defined(USE_READ_BARRIER) mvn x0, xzr // Read barrier must be enabled here. ret // Return -1. #endif -.if \is_resolved - mov x2, x0 // class is actually stored in x0 already -.else - ldr x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64] // Load dex cache resolved types array - // Load the class (x2) - ldr w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] - // If the class is null, go slow path. The check is required to read the lock word. - cbz w2, .Lslow_path\name -.endif -.if \read_barrier - // Most common case: GC is not marking. - ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] - cbnz x3, .Lmarking\name -.endif .Ldo_allocation\name: \fast_path .Lslow_path\name -.Lmarking\name: -.if \read_barrier - // GC is marking, check the lock word of the class for the mark bit. - // Class is not null, check mark bit in lock word. - ldr w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - // If the bit is not zero, do the allocation. - tbnz w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name - // The read barrier slow path. Mark - // the class. - SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32 // Save registers (x0, x1, lr). - SAVE_REG xLR, 24 // Align sp by 16 bytes. - mov x0, x2 // Pass the class as the first param. - bl artReadBarrierMark - mov x2, x0 // Get the (marked) class back. - RESTORE_REG xLR, 24 - RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32 // Restore registers. - b .Ldo_allocation\name -.endif .Lslow_path\name: SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. - mov x2, xSELF // Pass Thread::Current. - bl \entrypoint // (uint32_t type_idx, Method* method, Thread*) + mov x1, xSELF // Pass Thread::Current. + bl \entrypoint // (mirror::Class*, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER END \name .endm -// Use ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since the null check is already done in GENERATE_ALLOC_OBJECT_TLAB. -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 0, 1 -// No read barrier for the resolved or initialized cases since the caller is responsible for the -// read barrier due to the to-space invariant. -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1, 0 -GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1, 0 +GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED +GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED // TODO: We could use this macro for the normal tlab allocator too. diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 3e8cdc9374..964ea563b0 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -1831,116 +1831,10 @@ END \name // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -ENTRY art_quick_alloc_object_rosalloc - - # Fast path rosalloc allocation - # a0: type_idx - # a1: ArtMethod* - # s1: Thread::Current - # ----------------------------- - # t0: class - # t1: object size - # t2: rosalloc run - # t3: thread stack top offset - # t4: thread stack bottom offset - # v0: free list head - # - # t5, t6 : temps - - lw $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_32($a1) # Load dex cache resolved types - # array. - - sll $t5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT # Shift the value. - addu $t5, $t0, $t5 # Compute the index. - lw $t0, 0($t5) # Load class (t0). - beqz $t0, .Lart_quick_alloc_object_rosalloc_slow_path - - li $t6, MIRROR_CLASS_STATUS_INITIALIZED - lw $t5, MIRROR_CLASS_STATUS_OFFSET($t0) # Check class status. - bne $t5, $t6, .Lart_quick_alloc_object_rosalloc_slow_path - - # Add a fake dependence from the following access flag and size loads to the status load. This - # is to prevent those loads from being reordered above the status load and reading wrong values. - xor $t5, $t5, $t5 - addu $t0, $t0, $t5 - - lw $t5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0) # Check if access flags has - li $t6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE # kAccClassIsFinalizable. - and $t6, $t5, $t6 - bnez $t6, .Lart_quick_alloc_object_rosalloc_slow_path - - lw $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) # Check if thread local allocation - lw $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1) # stack has any room left. - bgeu $t3, $t4, .Lart_quick_alloc_object_rosalloc_slow_path - - lw $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0) # Load object size (t1). - li $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE # Check if size is for a thread local - # allocation. - bgtu $t1, $t5, .Lart_quick_alloc_object_rosalloc_slow_path - - # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket - # quantum size and divide by the quantum size and subtract by 1. - - addiu $t1, $t1, -1 # Decrease obj size and shift right - srl $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT # by quantum. - - sll $t2, $t1, POINTER_SIZE_SHIFT - addu $t2, $t2, $s1 - lw $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2) # Load rosalloc run (t2). - - # Load the free list head (v0). - # NOTE: this will be the return val. - - lw $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) - beqz $v0, .Lart_quick_alloc_object_rosalloc_slow_path - nop - - # Load the next pointer of the head and update the list head with the next pointer. - - lw $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0) - sw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) - - # Store the class pointer in the header. This also overwrites the first pointer. The offsets are - # asserted to match. - -#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET -#error "Class pointer needs to overwrite next pointer." -#endif - - POISON_HEAP_REF $t0 - sw $t0, MIRROR_OBJECT_CLASS_OFFSET($v0) - - # Push the new object onto the thread local allocation stack and increment the thread local - # allocation stack top. - - sw $v0, 0($t3) - addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE - sw $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) - - # Decrement the size of the free list. - - lw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) - addiu $t5, $t5, -1 - sw $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) - - sync # Fence. - - jalr $zero, $ra - nop - - .Lart_quick_alloc_object_rosalloc_slow_path: - - SETUP_SAVE_REFS_ONLY_FRAME - la $t9, artAllocObjectFromCodeRosAlloc - jalr $t9 - move $a2, $s1 # Pass self as argument. - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER - -END art_quick_alloc_object_rosalloc -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 0861d2d73e..2a18d53aea 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -1775,107 +1775,9 @@ END \name // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -ENTRY art_quick_alloc_object_rosalloc - - # Fast path rosalloc allocation - # a0: type_idx - # a1: ArtMethod* - # s1: Thread::Current - # ----------------------------- - # t0: class - # t1: object size - # t2: rosalloc run - # t3: thread stack top offset - # a4: thread stack bottom offset - # v0: free list head - # - # a5, a6 : temps - - ld $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_64($a1) # Load dex cache resolved types array. - - dsll $a5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT # Shift the value. - daddu $a5, $t0, $a5 # Compute the index. - lwu $t0, 0($a5) # Load class (t0). - beqzc $t0, .Lart_quick_alloc_object_rosalloc_slow_path - - li $a6, MIRROR_CLASS_STATUS_INITIALIZED - lwu $a5, MIRROR_CLASS_STATUS_OFFSET($t0) # Check class status. - bnec $a5, $a6, .Lart_quick_alloc_object_rosalloc_slow_path - - # Add a fake dependence from the following access flag and size loads to the status load. This - # is to prevent those loads from being reordered above the status load and reading wrong values. - xor $a5, $a5, $a5 - daddu $t0, $t0, $a5 - - lwu $a5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0) # Check if access flags has - li $a6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE # kAccClassIsFinalizable. - and $a6, $a5, $a6 - bnezc $a6, .Lart_quick_alloc_object_rosalloc_slow_path - - ld $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) # Check if thread local allocation stack - ld $a4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1) # has any room left. - bgeuc $t3, $a4, .Lart_quick_alloc_object_rosalloc_slow_path - - lwu $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0) # Load object size (t1). - li $a5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE # Check if size is for a thread local - # allocation. - bltuc $a5, $t1, .Lart_quick_alloc_object_rosalloc_slow_path - - # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket - # quantum size and divide by the quantum size and subtract by 1. - daddiu $t1, $t1, -1 # Decrease obj size and shift right by - dsrl $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT # quantum. - - dsll $t2, $t1, POINTER_SIZE_SHIFT - daddu $t2, $t2, $s1 - ld $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2) # Load rosalloc run (t2). - - # Load the free list head (v0). - # NOTE: this will be the return val. - ld $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) - beqzc $v0, .Lart_quick_alloc_object_rosalloc_slow_path - - # Load the next pointer of the head and update the list head with the next pointer. - ld $a5, ROSALLOC_SLOT_NEXT_OFFSET($v0) - sd $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2) - - # Store the class pointer in the header. This also overwrites the first pointer. The offsets are - # asserted to match. - -#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET -#error "Class pointer needs to overwrite next pointer." -#endif - - POISON_HEAP_REF $t0 - sw $t0, MIRROR_OBJECT_CLASS_OFFSET($v0) - - # Push the new object onto the thread local allocation stack and increment the thread local - # allocation stack top. - sd $v0, 0($t3) - daddiu $t3, $t3, COMPRESSED_REFERENCE_SIZE - sd $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1) - - # Decrement the size of the free list. - lw $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) - addiu $a5, $a5, -1 - sw $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2) - - sync # Fence. - - jalr $zero, $ra - .cpreturn # Restore gp from t8 in branch delay slot. - -.Lart_quick_alloc_object_rosalloc_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME - jal artAllocObjectFromCodeRosAlloc - move $a2 ,$s1 # Pass self as argument. - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER - -END art_quick_alloc_object_rosalloc - -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S index db2fdcabea..abd9046174 100644 --- a/runtime/arch/quick_alloc_entrypoints.S +++ b/runtime/arch/quick_alloc_entrypoints.S @@ -15,15 +15,13 @@ */ .macro GENERATE_ALLOC_ENTRYPOINTS c_suffix, cxx_suffix -// Called by managed code to allocate an object. -TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object of a resolved class. -TWO_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object of an initialized class. -TWO_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an object when the caller doesn't know whether it has access // to the created type. -TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_DOWNCALL art_quick_alloc_object_with_checks\c_suffix, artAllocObjectFromCodeWithChecks\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array. THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate an array of a resolve class. @@ -61,14 +59,12 @@ GENERATE_ALLOC_ENTRYPOINTS _region_tlab_instrumented, RegionTLABInstrumented // Generate the allocation entrypoints for each allocator. This is used as an alternative to // GNERATE_ALL_ALLOC_ENTRYPOINTS for selectively implementing allocation fast paths in // hand-written assembly. -#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + ONE_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + ONE_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + ONE_ARG_DOWNCALL art_quick_alloc_object_with_checks ## c_suffix, artAllocObjectFromCodeWithChecks ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \ THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \ @@ -93,8 +89,7 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR // This is to be separately defined for each architecture to allow a hand-written assembly fast path. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) @@ -109,8 +104,7 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR // This is to be separately defined for each architecture to allow a hand-written assembly fast path. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB) @@ -129,7 +123,6 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR .endm .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc) @@ -142,7 +135,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented) @@ -156,8 +148,7 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMal GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented) // This is to be separately defined for each architecture to allow a hand-written assembly fast path. -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc) @@ -169,7 +160,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented) @@ -182,7 +172,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAl GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer) @@ -195,7 +184,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented) @@ -208,7 +196,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, B GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented) @@ -221,7 +208,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstr GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region) @@ -234,7 +220,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented) @@ -247,7 +232,6 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionI GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented) -GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented) diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 9e385f839f..ee65fa8ab0 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -1062,12 +1062,8 @@ TEST_F(StubTest, AllocObject) { EXPECT_FALSE(self->IsExceptionPending()); { - // Use an arbitrary method from c to use as referrer - size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex().index_), // type_idx - // arbitrary - reinterpret_cast<size_t>(c->GetVirtualMethod(0, kRuntimePointerSize)), - 0U, - StubTest::GetEntrypoint(self, kQuickAllocObject), + size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U, + StubTest::GetEntrypoint(self, kQuickAllocObjectWithChecks), self); EXPECT_FALSE(self->IsExceptionPending()); @@ -1078,8 +1074,6 @@ TEST_F(StubTest, AllocObject) { } { - // We can use null in the second argument as we do not need a method here (not used in - // resolved/initialized cases) size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U, StubTest::GetEntrypoint(self, kQuickAllocObjectResolved), self); @@ -1092,8 +1086,6 @@ TEST_F(StubTest, AllocObject) { } { - // We can use null in the second argument as we do not need a method here (not used in - // resolved/initialized cases) size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U, StubTest::GetEntrypoint(self, kQuickAllocObjectInitialized), self); diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index c6f4c0346f..62c29cf268 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -956,52 +956,42 @@ END_MACRO // Generate the allocation entrypoints for each allocator. GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -DEFINE_FUNCTION art_quick_alloc_object_rosalloc +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc). +DEFINE_FUNCTION art_quick_alloc_object_resolved_rosalloc // Fast path rosalloc allocation. - // eax: uint32_t type_idx/return value, ecx: ArtMethod* - // ebx, edx: free - PUSH edi - movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx // Load dex cache resolved types array - // Load the class (edx) - movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx - testl %edx, %edx // Check null class - jz .Lart_quick_alloc_object_rosalloc_slow_path - + // eax: type/return value + // ecx, ebx, edx: free movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread // Check if the thread local allocation // stack has room - movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi - cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %edi - jae .Lart_quick_alloc_object_rosalloc_slow_path + movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %ecx + cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %ecx + jae .Lart_quick_alloc_object_resolved_rosalloc_slow_path - movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %edi // Load the object size (edi) + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%eax), %ecx // Load the object size (ecx) // Check if the size is for a thread // local allocation. Also does the // finalizable and initialization check. - cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %edi - ja .Lart_quick_alloc_object_rosalloc_slow_path - shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %edi // Calculate the rosalloc bracket index + cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %ecx + ja .Lart_quick_alloc_object_resolved_rosalloc_slow_path + shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %ecx // Calculate the rosalloc bracket index // from object size. // Load thread local rosalloc run (ebx) // Subtract __SIZEOF_POINTER__ to subtract // one from edi as there is no 0 byte run // and the size is already aligned. - movl (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%ebx, %edi, __SIZEOF_POINTER__), %ebx + movl (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%ebx, %ecx, __SIZEOF_POINTER__), %ebx // Load free_list head (edi), // this will be the return value. - movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %edi - test %edi, %edi - jz .Lart_quick_alloc_object_rosalloc_slow_path + movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %ecx + jecxz .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Point of no slow path. Won't go to - // the slow path from here on. Ok to - // clobber eax and ecx. - movl %edi, %eax + // the slow path from here on. // Load the next pointer of the head // and update head of free list with // next pointer - movl ROSALLOC_SLOT_NEXT_OFFSET(%eax), %edi - movl %edi, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx) + movl ROSALLOC_SLOT_NEXT_OFFSET(%ecx), %edx + movl %edx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx) // Decrement size of free list by 1 decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%ebx) // Store the class pointer in the @@ -1011,141 +1001,104 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET #error "Class pointer needs to overwrite next pointer." #endif - POISON_HEAP_REF edx - movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax) + POISON_HEAP_REF eax + movl %eax, MIRROR_OBJECT_CLASS_OFFSET(%ecx) movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread // Push the new object onto the thread // local allocation stack and // increment the thread local // allocation stack top. - movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi - movl %eax, (%edi) - addl LITERAL(COMPRESSED_REFERENCE_SIZE), %edi - movl %edi, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx) + movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %eax + movl %ecx, (%eax) + addl LITERAL(COMPRESSED_REFERENCE_SIZE), %eax + movl %eax, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx) // No fence needed for x86. - POP edi + movl %ecx, %eax // Move object to return register ret -.Lart_quick_alloc_object_rosalloc_slow_path: - POP edi +.Lart_quick_alloc_object_resolved_rosalloc_slow_path: SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx // save ref containing registers for GC // Outgoing argument set up - PUSH eax // alignment padding + subl LITERAL(8), %esp // alignment padding pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH ecx PUSH eax - call SYMBOL(artAllocObjectFromCodeRosAlloc) // cxx_name(arg0, arg1, Thread*) + call SYMBOL(artAllocObjectFromCodeResolvedRosAlloc) // cxx_name(arg0, Thread*) addl LITERAL(16), %esp // pop arguments CFI_ADJUST_CFA_OFFSET(-16) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception -END_FUNCTION art_quick_alloc_object_rosalloc +END_FUNCTION art_quick_alloc_object_resolved_rosalloc -// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. +// The common fast path code for art_quick_alloc_object_resolved_tlab +// and art_quick_alloc_object_resolved_region_tlab. // -// EAX: type_idx/return_value, ECX: ArtMethod*, EDX: the class. -MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel) - testl %edx, %edx // Check null class - jz VAR(slowPathLabel) +// EAX: type/return_value +MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel) movl %fs:THREAD_SELF_OFFSET, %ebx // ebx = thread movl THREAD_LOCAL_END_OFFSET(%ebx), %edi // Load thread_local_end. subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi // Compute the remaining buffer size. - movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %esi // Load the object size. - cmpl %edi, %esi // Check if it fits. + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%eax), %ecx // Load the object size. + cmpl %edi, %ecx // Check if it fits. ja VAR(slowPathLabel) - movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax // Load thread_local_pos + movl THREAD_LOCAL_POS_OFFSET(%ebx), %edx // Load thread_local_pos // as allocated object. - addl %eax, %esi // Add the object size. - movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx) // Update thread_local_pos. + addl %edx, %ecx // Add the object size. + movl %ecx, THREAD_LOCAL_POS_OFFSET(%ebx) // Update thread_local_pos. incl THREAD_LOCAL_OBJECTS_OFFSET(%ebx) // Increase thread_local_objects. // Store the class pointer in the header. // No fence needed for x86. - POISON_HEAP_REF edx - movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax) + POISON_HEAP_REF eax + movl %eax, MIRROR_OBJECT_CLASS_OFFSET(%edx) + movl %edx, %eax POP edi - POP esi ret // Fast path succeeded. END_MACRO -// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. -MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) +// The common slow path code for art_quick_alloc_object_resolved_tlab +// and art_quick_alloc_object_resolved_region_tlab. +MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH, cxx_name) POP edi - POP esi SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx // save ref containing registers for GC // Outgoing argument set up - PUSH eax // alignment padding + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH ecx PUSH eax - call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) + call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) addl LITERAL(16), %esp CFI_ADJUST_CFA_OFFSET(-16) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be called +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be called // for CC if the GC is not marking. -DEFINE_FUNCTION art_quick_alloc_object_tlab +DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab // Fast path tlab allocation. - // EAX: uint32_t type_idx/return value, ECX: ArtMethod*. - // EBX, EDX: free. - PUSH esi + // EAX: type + // EBX, ECX, EDX: free. PUSH edi - movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx // Load dex cache resolved types array - // Might need to break down into multiple instructions to get the base address in a register. - // Load the class - movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path -.Lart_quick_alloc_object_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB -END_FUNCTION art_quick_alloc_object_tlab - -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB). -DEFINE_FUNCTION art_quick_alloc_object_region_tlab + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path +.Lart_quick_alloc_object_resolved_tlab_slow_path: + ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB +END_FUNCTION art_quick_alloc_object_resolved_tlab + +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB). +DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab // Fast path region tlab allocation. - // EAX: uint32_t type_idx/return value, ECX: ArtMethod*. - // EBX, EDX: free. + // EAX: type/return value + // EBX, ECX, EDX: free. #if !defined(USE_READ_BARRIER) int3 int3 #endif - PUSH esi PUSH edi - movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx // Load dex cache resolved types array - // Might need to break down into multiple instructions to get the base address in a register. - // Load the class - movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx - // Read barrier for class load. - cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit - // Null check so that we can load the lock word. - testl %edx, %edx - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit - // Check the mark bit, if it is 1 return. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: - // The read barrier slow path. Mark the class. - PUSH eax - PUSH ecx - // Outgoing argument set up - subl MACRO_LITERAL(8), %esp // Alignment padding - CFI_ADJUST_CFA_OFFSET(8) - PUSH edx // Pass the class as the first param. - call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) - movl %eax, %edx - addl MACRO_LITERAL(12), %esp - CFI_ADJUST_CFA_OFFSET(-12) - POP ecx - POP eax - jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_region_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB -END_FUNCTION art_quick_alloc_object_region_tlab + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path +.Lart_quick_alloc_object_resolved_region_tlab_slow_path: + ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB +END_FUNCTION art_quick_alloc_object_resolved_region_tlab + DEFINE_FUNCTION art_quick_resolve_string SETUP_SAVE_EVERYTHING_FRAME ebx, ebx diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 4c46b08a9e..facd563428 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -983,7 +983,6 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS // Comment out allocators that have x86_64 specific asm. // Region TLAB: -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) @@ -996,11 +995,9 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) // Normal TLAB: -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) -// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB) @@ -1009,29 +1006,25 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). -DEFINE_FUNCTION art_quick_alloc_object_rosalloc + +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc). +DEFINE_FUNCTION art_quick_alloc_object_resolved_rosalloc // Fast path rosalloc allocation. - // RDI: type_idx, RSI: ArtMethod*, RAX: return value - // RDX, RCX, R8, R9: free. - movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array - // Load the class (edx) - movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx - testl %edx, %edx // Check null class - jz .Lart_quick_alloc_object_rosalloc_slow_path + // RDI: mirror::Class*, RAX: return value + // RSI, RDX, RCX, R8, R9: free. // Check if the thread local // allocation stack has room. movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread movq THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx // rcx = alloc stack top. cmpq THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx - jae .Lart_quick_alloc_object_rosalloc_slow_path + jae .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Load the object size - movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdx), %eax + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax // Check if the size is for a thread // local allocation. Also does the // initialized and finalizable checks. cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax - ja .Lart_quick_alloc_object_rosalloc_slow_path + ja .Lart_quick_alloc_object_resolved_rosalloc_slow_path // Compute the rosalloc bracket index // from the size. shrq LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax @@ -1045,7 +1038,7 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc // will be the return val. movq (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax testq %rax, %rax - jz .Lart_quick_alloc_object_rosalloc_slow_path + jz .Lart_quick_alloc_object_resolved_rosalloc_slow_path // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi. // Push the new object onto the thread // local allocation stack and @@ -1066,17 +1059,17 @@ DEFINE_FUNCTION art_quick_alloc_object_rosalloc #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET #error "Class pointer needs to overwrite next pointer." #endif - POISON_HEAP_REF edx - movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax) + POISON_HEAP_REF edi + movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) // Decrement the size of the free list decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9) // No fence necessary for x86. ret -.Lart_quick_alloc_object_rosalloc_slow_path: +.Lart_quick_alloc_object_resolved_rosalloc_slow_path: SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC // Outgoing argument set up - movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() - call SYMBOL(artAllocObjectFromCodeRosAlloc) // cxx_name(arg0, arg1, Thread*) + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call SYMBOL(artAllocObjectFromCodeResolvedRosAlloc) // cxx_name(arg0, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_FUNCTION art_quick_alloc_object_rosalloc @@ -1095,19 +1088,19 @@ END_MACRO // TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as // ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH. // -// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value. -// RCX: scratch, r8: Thread::Current(). +// RDI: the class, RAX: return value. +// RCX, RSI, RDX: scratch, r8: Thread::Current(). MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel) ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel)) END_MACRO // The fast path code for art_quick_alloc_object_initialized_region_tlab. // -// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value. -// RCX: scratch, r8: Thread::Current(). +// RDI: the class, RSI: ArtMethod*, RAX: return value. +// RCX, RSI, RDX: scratch, r8: Thread::Current(). MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread - movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdx), %ecx // Load the object size. + movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size. movq THREAD_LOCAL_POS_OFFSET(%r8), %rax addq %rax, %rcx // Add size to pos, note that these // are both 32 bit ints, overflow @@ -1120,8 +1113,8 @@ MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) // Store the class pointer in the // header. // No fence needed for x86. - POISON_HEAP_REF edx - movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax) + POISON_HEAP_REF edi + movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) ret // Fast path succeeded. END_MACRO @@ -1164,12 +1157,14 @@ MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, slowPathLabel) ret // Fast path succeeded. END_MACRO -// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. + +// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab +// and art_quick_alloc_object_{resolved, initialized}_region_tlab. MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC // Outgoing argument set up - movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() - call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO @@ -1184,26 +1179,11 @@ MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name) RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be -// called with CC if the GC is not active. -DEFINE_FUNCTION art_quick_alloc_object_tlab - // RDI: uint32_t type_idx, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array - // Might need to break down into multiple instructions to get the base address in a register. - // Load the class - movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx - ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path -.Lart_quick_alloc_object_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB -END_FUNCTION art_quick_alloc_object_tlab - // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be // called with CC if the GC is not active. DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - movq %rdi, %rdx + // RDI: mirror::Class* klass + // RDX, RSI, RCX, R8, R9: free. RAX: return val. ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path .Lart_quick_alloc_object_resolved_tlab_slow_path: ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB @@ -1212,9 +1192,8 @@ END_FUNCTION art_quick_alloc_object_resolved_tlab // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB). // May be called with CC if the GC is not active. DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - movq %rdi, %rdx + // RDI: mirror::Class* klass + // RDX, RSI, RCX, R8, R9: free. RAX: return val. ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path .Lart_quick_alloc_object_initialized_tlab_slow_path: ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB @@ -1292,49 +1271,12 @@ DEFINE_FUNCTION art_quick_alloc_array_resolved_region_tlab ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB END_FUNCTION art_quick_alloc_array_resolved_region_tlab -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB). -DEFINE_FUNCTION art_quick_alloc_object_region_tlab - // Fast path region tlab allocation. - // RDI: uint32_t type_idx, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. - ASSERT_USE_READ_BARRIER - movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array - movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx // Load the class - // Null check so that we can load the lock word. - testl %edx, %edx - jz .Lart_quick_alloc_object_region_tlab_slow_path - // Since we have allocation entrypoint switching, we know the GC is marking. - // Check the mark bit, if it is 0, do the read barrier mark. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: - // Use resolved one since we already did the null check. - ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path -.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: - // The read barrier slow path. Mark the class. - PUSH rdi - PUSH rsi - subq LITERAL(8), %rsp // 16 byte alignment - // Outgoing argument set up - movq %rdx, %rdi // Pass the class as the first param. - call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) - movq %rax, %rdx - addq LITERAL(8), %rsp - POP rsi - POP rdi - jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit -.Lart_quick_alloc_object_region_tlab_slow_path: - ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB -END_FUNCTION art_quick_alloc_object_region_tlab - // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB). DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab // Fast path region tlab allocation. - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. + // RDI: mirror::Class* klass + // RDX, RSI, RCX, R8, R9: free. RAX: return val. ASSERT_USE_READ_BARRIER - // No read barrier since the caller is responsible for that. - movq %rdi, %rdx ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path .Lart_quick_alloc_object_resolved_region_tlab_slow_path: ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB @@ -1343,10 +1285,9 @@ END_FUNCTION art_quick_alloc_object_resolved_region_tlab // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB). DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab // Fast path region tlab allocation. - // RDI: mirror::Class* klass, RSI: ArtMethod* - // RDX, RCX, R8, R9: free. RAX: return val. + // RDI: mirror::Class* klass + // RDX, RSI, RCX, R8, R9: free. RAX: return val. ASSERT_USE_READ_BARRIER - movq %rdi, %rdx // No read barrier since the caller is responsible for that. ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path .Lart_quick_alloc_object_initialized_region_tlab_slow_path: diff --git a/runtime/asm_support.h b/runtime/asm_support.h index e4972da13d..bfdddf7b03 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -98,7 +98,7 @@ ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET, ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET, art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value()) // Offset of field Thread::tlsPtr_.thread_local_objects. -#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__) +#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET, art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value()) // Offset of field Thread::tlsPtr_.mterp_current_ibase. diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index 14c9c21356..469c45c10c 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -127,43 +127,21 @@ inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveTyp self->GetManagedStack()->GetTopQuickFrame(), type, true /* do_caller_check */); } -template <const bool kAccessCheck> -ALWAYS_INLINE -inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx, - ArtMethod* method, - Thread* self, - bool* slow_path) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - PointerSize pointer_size = class_linker->GetImagePointerSize(); - mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, pointer_size); - if (UNLIKELY(klass == nullptr)) { - klass = class_linker->ResolveType(type_idx, method); +ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(mirror::Class* klass, + Thread* self, + bool* slow_path) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Roles::uninterruptible_) { + if (UNLIKELY(!klass->IsInstantiable())) { + self->ThrowNewException("Ljava/lang/InstantiationError;", klass->PrettyDescriptor().c_str()); *slow_path = true; - if (klass == nullptr) { - DCHECK(self->IsExceptionPending()); - return nullptr; // Failure - } else { - DCHECK(!self->IsExceptionPending()); - } + return nullptr; // Failure } - if (kAccessCheck) { - if (UNLIKELY(!klass->IsInstantiable())) { - self->ThrowNewException("Ljava/lang/InstantiationError;", klass->PrettyDescriptor().c_str()); - *slow_path = true; - return nullptr; // Failure - } - if (UNLIKELY(klass->IsClassClass())) { - ThrowIllegalAccessError(nullptr, "Class %s is inaccessible", - klass->PrettyDescriptor().c_str()); - *slow_path = true; - return nullptr; // Failure - } - mirror::Class* referrer = method->GetDeclaringClass(); - if (UNLIKELY(!referrer->CanAccess(klass))) { - ThrowIllegalAccessErrorClass(referrer, klass); - *slow_path = true; - return nullptr; // Failure - } + if (UNLIKELY(klass->IsClassClass())) { + ThrowIllegalAccessError(nullptr, "Class %s is inaccessible", + klass->PrettyDescriptor().c_str()); + *slow_path = true; + return nullptr; // Failure } if (UNLIKELY(!klass->IsInitialized())) { StackHandleScope<1> hs(self); @@ -191,7 +169,9 @@ inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx, ALWAYS_INLINE inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass, Thread* self, - bool* slow_path) { + bool* slow_path) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Roles::uninterruptible_) { if (UNLIKELY(!klass->IsInitialized())) { StackHandleScope<1> hs(self); Handle<mirror::Class> h_class(hs.NewHandle(klass)); @@ -213,18 +193,15 @@ inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass, return klass; } -// Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it -// cannot be resolved, throw an error. If it can, use it to create an instance. -// When verification/compiler hasn't been able to verify access, optionally perform an access -// check. -template <bool kAccessCheck, bool kInstrumented> +// Allocate an instance of klass. Throws InstantationError if klass is not instantiable, +// or IllegalAccessError if klass is j.l.Class. Performs a clinit check too. +template <bool kInstrumented> ALWAYS_INLINE -inline mirror::Object* AllocObjectFromCode(dex::TypeIndex type_idx, - ArtMethod* method, +inline mirror::Object* AllocObjectFromCode(mirror::Class* klass, Thread* self, gc::AllocatorType allocator_type) { bool slow_path = false; - mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self, &slow_path); + klass = CheckObjectAlloc(klass, self, &slow_path); if (UNLIKELY(slow_path)) { if (klass == nullptr) { return nullptr; diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h index 7cc136e227..4794610ca8 100644 --- a/runtime/entrypoints/entrypoint_utils.h +++ b/runtime/entrypoints/entrypoint_utils.h @@ -45,27 +45,10 @@ class OatQuickMethodHeader; class ScopedObjectAccessAlreadyRunnable; class Thread; -template <const bool kAccessCheck> -ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx, - ArtMethod* method, - Thread* self, - bool* slow_path) - REQUIRES_SHARED(Locks::mutator_lock_) - REQUIRES(!Roles::uninterruptible_); - -ALWAYS_INLINE inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass, - Thread* self, - bool* slow_path) - REQUIRES_SHARED(Locks::mutator_lock_) - REQUIRES(!Roles::uninterruptible_); - // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it // cannot be resolved, throw an error. If it can, use it to create an instance. -// When verification/compiler hasn't been able to verify access, optionally perform an access -// check. -template <bool kAccessCheck, bool kInstrumented> -ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(dex::TypeIndex type_idx, - ArtMethod* method, +template <bool kInstrumented> +ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(mirror::Class* klass, Thread* self, gc::AllocatorType allocator_type) REQUIRES_SHARED(Locks::mutator_lock_) diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc index 82bb8e53c6..2d06508069 100644 --- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc @@ -29,87 +29,58 @@ namespace art { static constexpr bool kUseTlabFastPath = true; +template <bool kInitialized, + bool kFinalize, + bool kInstrumented, + gc::AllocatorType allocator_type> +static ALWAYS_INLINE inline mirror::Object* artAllocObjectFromCode( + mirror::Class* klass, + Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + DCHECK(klass != nullptr); + if (kUseTlabFastPath && !kInstrumented && allocator_type == gc::kAllocatorTypeTLAB) { + if (kInitialized || klass->IsInitialized()) { + if (!kFinalize || !klass->IsFinalizable()) { + size_t byte_count = klass->GetObjectSize(); + byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); + mirror::Object* obj; + if (LIKELY(byte_count < self->TlabSize())) { + obj = self->AllocTlab(byte_count); + DCHECK(obj != nullptr) << "AllocTlab can't fail"; + obj->SetClass(klass); + if (kUseBakerReadBarrier) { + obj->AssertReadBarrierState(); + } + QuasiAtomic::ThreadFenceForConstructor(); + return obj; + } + } + } + } + if (kInitialized) { + return AllocObjectFromCodeInitialized<kInstrumented>(klass, self, allocator_type); + } else if (!kFinalize) { + return AllocObjectFromCodeResolved<kInstrumented>(klass, self, allocator_type); + } else { + return AllocObjectFromCode<kInstrumented>(klass, self, allocator_type); + } +} + #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \ -extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \ - uint32_t type_idx, ArtMethod* method, Thread* self) \ +extern "C" mirror::Object* artAllocObjectFromCodeWithChecks##suffix##suffix2( \ + mirror::Class* klass, Thread* self) \ REQUIRES_SHARED(Locks::mutator_lock_) { \ - ScopedQuickEntrypointChecks sqec(self); \ - if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \ - mirror::Class* klass = method->GetDexCacheResolvedType<false>(dex::TypeIndex(type_idx), \ - kRuntimePointerSize); \ - if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \ - size_t byte_count = klass->GetObjectSize(); \ - byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \ - mirror::Object* obj; \ - if (LIKELY(byte_count < self->TlabSize())) { \ - obj = self->AllocTlab(byte_count); \ - DCHECK(obj != nullptr) << "AllocTlab can't fail"; \ - obj->SetClass(klass); \ - if (kUseBakerReadBarrier) { \ - obj->AssertReadBarrierState(); \ - } \ - QuasiAtomic::ThreadFenceForConstructor(); \ - return obj; \ - } \ - } \ - } \ - return AllocObjectFromCode<false, instrumented_bool>(dex::TypeIndex(type_idx), \ - method, \ - self, \ - allocator_type); \ + return artAllocObjectFromCode<false, true, instrumented_bool, allocator_type>(klass, self); \ } \ extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \ - mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \ + mirror::Class* klass, Thread* self) \ REQUIRES_SHARED(Locks::mutator_lock_) { \ - ScopedQuickEntrypointChecks sqec(self); \ - if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \ - if (LIKELY(klass->IsInitialized())) { \ - size_t byte_count = klass->GetObjectSize(); \ - byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \ - mirror::Object* obj; \ - if (LIKELY(byte_count < self->TlabSize())) { \ - obj = self->AllocTlab(byte_count); \ - DCHECK(obj != nullptr) << "AllocTlab can't fail"; \ - obj->SetClass(klass); \ - if (kUseBakerReadBarrier) { \ - obj->AssertReadBarrierState(); \ - } \ - QuasiAtomic::ThreadFenceForConstructor(); \ - return obj; \ - } \ - } \ - } \ - return AllocObjectFromCodeResolved<instrumented_bool>(klass, self, allocator_type); \ + return artAllocObjectFromCode<false, false, instrumented_bool, allocator_type>(klass, self); \ } \ extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \ - mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \ - REQUIRES_SHARED(Locks::mutator_lock_) { \ - ScopedQuickEntrypointChecks sqec(self); \ - if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \ - size_t byte_count = klass->GetObjectSize(); \ - byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \ - mirror::Object* obj; \ - if (LIKELY(byte_count < self->TlabSize())) { \ - obj = self->AllocTlab(byte_count); \ - DCHECK(obj != nullptr) << "AllocTlab can't fail"; \ - obj->SetClass(klass); \ - if (kUseBakerReadBarrier) { \ - obj->AssertReadBarrierState(); \ - } \ - QuasiAtomic::ThreadFenceForConstructor(); \ - return obj; \ - } \ - } \ - return AllocObjectFromCodeInitialized<instrumented_bool>(klass, self, allocator_type); \ -} \ -extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \ - uint32_t type_idx, ArtMethod* method, Thread* self) \ + mirror::Class* klass, Thread* self) \ REQUIRES_SHARED(Locks::mutator_lock_) { \ - ScopedQuickEntrypointChecks sqec(self); \ - return AllocObjectFromCode<true, instrumented_bool>(dex::TypeIndex(type_idx), \ - method, \ - self, \ - allocator_type); \ + return artAllocObjectFromCode<true, false, instrumented_bool, allocator_type>(klass, self); \ } \ extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \ uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \ @@ -220,10 +191,9 @@ GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(RegionTLAB, gc::kAllocatorTypeRegionTLAB) extern "C" void* art_quick_alloc_array##suffix(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_array_resolved##suffix(mirror::Class* klass, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, int32_t, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_resolved##suffix(mirror::Class* klass, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_initialized##suffix(mirror::Class* klass, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_resolved##suffix(mirror::Class* klass); \ +extern "C" void* art_quick_alloc_object_initialized##suffix(mirror::Class* klass); \ +extern "C" void* art_quick_alloc_object_with_checks##suffix(mirror::Class* klass); \ extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_string_from_bytes##suffix(void*, int32_t, int32_t, int32_t); \ @@ -233,9 +203,9 @@ extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, int32_t, extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(mirror::Class* klass, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(mirror::Class* klass, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(mirror::Class* klass, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, ArtMethod* ref); \ +extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(mirror::Class* klass); \ +extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(mirror::Class* klass); \ +extern "C" void* art_quick_alloc_object_with_checks##suffix##_instrumented(mirror::Class* klass); \ extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_string_from_bytes##suffix##_instrumented(void*, int32_t, int32_t, int32_t); \ @@ -246,10 +216,9 @@ void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrument qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \ qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix##_instrumented; \ qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix##_instrumented; \ - qpoints->pAllocObject = art_quick_alloc_object##suffix##_instrumented; \ qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix##_instrumented; \ qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix##_instrumented; \ - qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \ + qpoints->pAllocObjectWithChecks = art_quick_alloc_object_with_checks##suffix##_instrumented; \ qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \ qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \ qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix##_instrumented; \ @@ -259,10 +228,9 @@ void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrument qpoints->pAllocArray = art_quick_alloc_array##suffix; \ qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix; \ qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix; \ - qpoints->pAllocObject = art_quick_alloc_object##suffix; \ qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix; \ qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix; \ - qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \ + qpoints->pAllocObjectWithChecks = art_quick_alloc_object_with_checks##suffix; \ qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \ qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \ qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix; \ diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index a1c5082c93..0911aeb0f4 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -23,10 +23,9 @@ V(AllocArray, void*, uint32_t, int32_t, ArtMethod*) \ V(AllocArrayResolved, void*, mirror::Class*, int32_t, ArtMethod*) \ V(AllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*) \ - V(AllocObject, void*, uint32_t, ArtMethod*) \ - V(AllocObjectResolved, void*, mirror::Class*, ArtMethod*) \ - V(AllocObjectInitialized, void*, mirror::Class*, ArtMethod*) \ - V(AllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*) \ + V(AllocObjectResolved, void*, mirror::Class*) \ + V(AllocObjectInitialized, void*, mirror::Class*) \ + V(AllocObjectWithChecks, void*, mirror::Class*) \ V(CheckAndAllocArray, void*, uint32_t, int32_t, ArtMethod*) \ V(CheckAndAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*) \ V(AllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t) \ diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 12836602d5..6866abb6ae 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -122,9 +122,9 @@ class EntrypointsOrderTest : public CommonRuntimeTest { // Skip across the entrypoints structures. + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*)); + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*)); @@ -156,13 +156,13 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArray, pAllocArrayResolved, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved, pAllocArrayWithAccessCheck, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayWithAccessCheck, pAllocObject, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObject, pAllocObjectResolved, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayWithAccessCheck, pAllocObjectResolved, + sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectResolved, pAllocObjectInitialized, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectInitialized, pAllocObjectWithAccessCheck, + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectInitialized, pAllocObjectWithChecks, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectWithAccessCheck, pCheckAndAllocArray, + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectWithChecks, pCheckAndAllocArray, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckAndAllocArray, pCheckAndAllocArrayWithAccessCheck, sizeof(void*)); diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index b0d7fb247a..d7dfcd4408 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -508,9 +508,8 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { - obj = AllocObjectFromCode<do_access_check, true>( - dex::TypeIndex(inst->VRegB_21c()), - shadow_frame.GetMethod(), + obj = AllocObjectFromCode<true>( + c.Ptr(), self, Runtime::Current()->GetHeap()->GetCurrentAllocator()); } diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index c8c1563ff6..369c2614a7 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -375,10 +375,9 @@ extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); obj = mirror::String::AllocEmptyString<true>(self, allocator_type); } else { - obj = AllocObjectFromCode<false, true>(dex::TypeIndex(inst->VRegB_21c()), - shadow_frame->GetMethod(), - self, - Runtime::Current()->GetHeap()->GetCurrentAllocator()); + obj = AllocObjectFromCode<true>(c, + self, + Runtime::Current()->GetHeap()->GetCurrentAllocator()); } } if (UNLIKELY(obj == nullptr)) { diff --git a/runtime/oat.h b/runtime/oat.h index 1fd906dc1b..dc103e2b52 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '9', '4', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '9', '5', '\0' }; // alloc entrypoints change static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/thread.cc b/runtime/thread.cc index aff12ff4c1..33c6a40320 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2627,10 +2627,9 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { QUICK_ENTRY_POINT_INFO(pAllocArray) QUICK_ENTRY_POINT_INFO(pAllocArrayResolved) QUICK_ENTRY_POINT_INFO(pAllocArrayWithAccessCheck) - QUICK_ENTRY_POINT_INFO(pAllocObject) QUICK_ENTRY_POINT_INFO(pAllocObjectResolved) QUICK_ENTRY_POINT_INFO(pAllocObjectInitialized) - QUICK_ENTRY_POINT_INFO(pAllocObjectWithAccessCheck) + QUICK_ENTRY_POINT_INFO(pAllocObjectWithChecks) QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray) QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck) QUICK_ENTRY_POINT_INFO(pAllocStringFromBytes) diff --git a/runtime/thread.h b/runtime/thread.h index 411d85f015..6308851096 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -1416,7 +1416,7 @@ class Thread { stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr), frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0), last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr), - thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr), + thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr), @@ -1540,12 +1540,12 @@ class Thread { JniEntryPoints jni_entrypoints; QuickEntryPoints quick_entrypoints; + // Thread-local allocation pointer. Moved here to force alignment for thread_local_pos on ARM. + uint8_t* thread_local_start; // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for // potentially better performance. uint8_t* thread_local_pos; uint8_t* thread_local_end; - // Thread-local allocation pointer. - uint8_t* thread_local_start; size_t thread_local_objects; diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java index 5fd51e1dca..89b9cb45c3 100644 --- a/test/529-checker-unresolved/src/Main.java +++ b/test/529-checker-unresolved/src/Main.java @@ -192,13 +192,13 @@ public class Main extends UnresolvedSuperClass { /// CHECK-START: void Main.testLicm(int) licm (before) /// CHECK: <<Class:l\d+>> LoadClass loop:B2 /// CHECK-NEXT: <<Clinit:l\d+>> ClinitCheck [<<Class>>] loop:B2 - /// CHECK-NEXT: <<New:l\d+>> NewInstance [<<Clinit>>,<<Method:[i|j]\d+>>] loop:B2 + /// CHECK-NEXT: <<New:l\d+>> NewInstance [<<Clinit>>] loop:B2 /// CHECK-NEXT: InvokeUnresolved [<<New>>] loop:B2 /// CHECK-START: void Main.testLicm(int) licm (after) /// CHECK: <<Class:l\d+>> LoadClass loop:none /// CHECK-NEXT: <<Clinit:l\d+>> ClinitCheck [<<Class>>] loop:none - /// CHECK: <<New:l\d+>> NewInstance [<<Clinit>>,<<Method:[i|j]\d+>>] loop:B2 + /// CHECK: <<New:l\d+>> NewInstance [<<Clinit>>] loop:B2 /// CHECK-NEXT: InvokeUnresolved [<<New>>] loop:B2 static public void testLicm(int count) { // Test to make sure we keep the initialization check after loading an unresolved class. diff --git a/test/621-checker-new-instance/expected.txt b/test/621-checker-new-instance/expected.txt deleted file mode 100644 index e69de29bb2..0000000000 --- a/test/621-checker-new-instance/expected.txt +++ /dev/null diff --git a/test/621-checker-new-instance/info.txt b/test/621-checker-new-instance/info.txt deleted file mode 100644 index c27c45ca7f..0000000000 --- a/test/621-checker-new-instance/info.txt +++ /dev/null @@ -1 +0,0 @@ -Tests for removing useless load class. diff --git a/test/621-checker-new-instance/src/Main.java b/test/621-checker-new-instance/src/Main.java deleted file mode 100644 index 68a46449f0..0000000000 --- a/test/621-checker-new-instance/src/Main.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public class Main { - /// CHECK-START: java.lang.Object Main.newObject() prepare_for_register_allocation (before) - /// CHECK: LoadClass - /// CHECK: NewInstance - - /// CHECK-START: java.lang.Object Main.newObject() prepare_for_register_allocation (after) - /// CHECK-NOT: LoadClass - /// CHECK: NewInstance - public static Object newObject() { - return new Object(); - } - - /// CHECK-START: java.lang.Object Main.newFinalizableMayThrow() prepare_for_register_allocation (after) - /// CHECK: LoadClass - /// CHECK: NewInstance - public static Object newFinalizableMayThrow() { - return $inline$newFinalizableMayThrow(); - } - - public static Object $inline$newFinalizableMayThrow() { - return new FinalizableMayThrow(); - } - - public static void main(String[] args) { - newFinalizableMayThrow(); - newObject(); - } -} - -class FinalizableMayThrow { - // clinit may throw OOME. - static Object o = new Object(); - static String s; - public void finalize() { - s = "Test"; - } -} |