Faster @CriticalNative for boot image.
The @CriticalNative call does not need the target method, so
we can avoid one instruction on x86, x86-64 and arm64. The
current approach for arm does not allow such optimization.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_blueline-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --64 --optimizing
Bug: 112189621
Change-Id: I11b7e415be2697757cbb11c9cccf4058d1d72d7d
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 3a39ee8..dac04a5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1060,6 +1060,12 @@
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
}
+ case MethodLoadKind::kBootImageLinkTimePcRelative:
+ // For kCallCriticalNative we skip loading the method and do the call directly.
+ if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
default: {
LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
break;
@@ -1076,9 +1082,15 @@
PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
kNativeStackAlignment,
GetCriticalNativeDirectCallFrameSize>(invoke);
- // (callee_method + offset_of_jni_entry_point)()
- __ call(Address(callee_method.AsRegister<CpuRegister>(),
- ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
+ if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
+ DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+ __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
+ RecordBootImageJniEntrypointPatch(invoke);
+ } else {
+ // (callee_method + offset_of_jni_entry_point)()
+ __ call(Address(callee_method.AsRegister<CpuRegister>(),
+ ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
+ }
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
// Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
switch (invoke->GetType()) {
@@ -1218,6 +1230,12 @@
return &string_bss_entry_patches_.back().label;
}
+void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
+ boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
+ invoke->GetResolvedMethodReference().index);
+ __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
+}
+
void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
if (GetCompilerOptions().IsBootImage()) {
__ leal(reg,
@@ -1292,6 +1310,7 @@
package_type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
string_bss_entry_patches_.size() +
+ boot_image_jni_entrypoint_patches_.size() +
boot_image_other_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
@@ -1323,6 +1342,8 @@
package_type_bss_entry_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
string_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
+ boot_image_jni_entrypoint_patches_, linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
@@ -1394,35 +1415,36 @@
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
- : CodeGenerator(graph,
- kNumberOfCpuRegisters,
- kNumberOfFloatRegisters,
- kNumberOfCpuRegisterPairs,
- ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
- arraysize(kCoreCalleeSaves))
- | (1 << kFakeReturnRegister),
- ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
- arraysize(kFpuCalleeSaves)),
- compiler_options,
- stats),
- block_labels_(nullptr),
- location_builder_(graph, this),
- instruction_visitor_(graph, this),
- move_resolver_(graph->GetAllocator(), this),
- assembler_(graph->GetAllocator()),
- constant_area_start_(0),
- boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
+ : CodeGenerator(graph,
+ kNumberOfCpuRegisters,
+ kNumberOfFloatRegisters,
+ kNumberOfCpuRegisterPairs,
+ ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+ arraysize(kCoreCalleeSaves))
+ | (1 << kFakeReturnRegister),
+ ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+ arraysize(kFpuCalleeSaves)),
+ compiler_options,
+ stats),
+ block_labels_(nullptr),
+ location_builder_(graph, this),
+ instruction_visitor_(graph, this),
+ move_resolver_(graph->GetAllocator(), this),
+ assembler_(graph->GetAllocator()),
+ constant_area_start_(0),
+ boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}