Faster @CriticalNative for boot image.
The @CriticalNative call does not need the target method, so
we can avoid one instruction on x86, x86-64 and arm64. The
current approach for arm does not allow such optimization.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_blueline-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --64 --optimizing
Bug: 112189621
Change-Id: I11b7e415be2697757cbb11c9cccf4058d1d72d7d
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 007aa43..5920a48 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -954,6 +954,7 @@
package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -4576,24 +4577,35 @@
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
}
+ case MethodLoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+ if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+ // Do not materialize the method pointer, load directly the entrypoint.
+ // Add ADRP with its PC-relative JNI entrypoint patch.
+ vixl::aarch64::Label* adrp_label =
+ NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
+ EmitAdrpPlaceholder(adrp_label, lr);
+ // Add the LDR with its PC-relative method patch.
+ vixl::aarch64::Label* add_label =
+ NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), adrp_label);
+ EmitLdrOffsetPlaceholder(add_label, lr, lr);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
default: {
LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
break;
}
}
- auto call_code_pointer_member = [&](MemberOffset offset) {
- // LR = callee_method->member;
- __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
- {
- // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
- ExactAssemblyScope eas(GetVIXLAssembler(),
- kInstructionSize,
- CodeBufferCheckScope::kExactSize);
- // lr()
- __ blr(lr);
- RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
- }
+ auto call_lr = [&]() {
+ // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+ ExactAssemblyScope eas(GetVIXLAssembler(),
+ kInstructionSize,
+ CodeBufferCheckScope::kExactSize);
+ // lr()
+ __ blr(lr);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
};
switch (invoke->GetCodePtrLocation()) {
case CodePtrLocation::kCallSelf:
@@ -4611,7 +4623,15 @@
PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
kAapcs64StackAlignment,
GetCriticalNativeDirectCallFrameSize>(invoke);
- call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArm64PointerSize));
+ if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
+ call_lr();
+ } else {
+ // LR = callee_method->ptr_sized_fields_.data_; // EntryPointFromJni
+ MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kArm64PointerSize);
+ __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
+ // lr()
+ call_lr();
+ }
// Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
switch (invoke->GetType()) {
case DataType::Type::kBool:
@@ -4641,9 +4661,14 @@
}
break;
}
- case CodePtrLocation::kCallArtMethod:
- call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize));
+ case CodePtrLocation::kCallArtMethod: {
+ // LR = callee_method->ptr_sized_fields_.entry_point_from_quick_compiled_code_;
+ MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
+ __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
+ // lr()
+ call_lr();
break;
+ }
}
DCHECK(!IsLeafMethod());
@@ -4814,6 +4839,13 @@
return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
}
+vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageJniEntrypointPatch(
+ MethodReference target_method,
+ vixl::aarch64::Label* adrp_label) {
+ return NewPcRelativePatch(
+ target_method.dex_file, target_method.index, adrp_label, &boot_image_jni_entrypoint_patches_);
+}
+
void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
DCHECK(!GetCompilerOptions().IsJitCompiler());
@@ -4980,6 +5012,7 @@
package_type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
string_bss_entry_patches_.size() +
+ boot_image_jni_entrypoint_patches_.size() +
boot_image_other_patches_.size() +
call_entrypoint_patches_.size() +
baker_read_barrier_patches_.size();
@@ -5013,6 +5046,8 @@
package_type_bss_entry_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
string_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
+ boot_image_jni_entrypoint_patches_, linker_patches);
for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
DCHECK(info.target_dex_file == nullptr);
linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(