Faster @CriticalNative for boot image.

The @CriticalNative call does not need the target method, so
we can avoid one instruction on x86, x86-64 and arm64. The
current approach for arm does not allow such optimization.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_blueline-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --64 --optimizing
Bug: 112189621
Change-Id: I11b7e415be2697757cbb11c9cccf4058d1d72d7d
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 3a39ee8..dac04a5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1060,6 +1060,12 @@
       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
       return;  // No code pointer retrieval; the runtime performs the call directly.
     }
+    case MethodLoadKind::kBootImageLinkTimePcRelative:
+      // For kCallCriticalNative we skip loading the method and do the call directly.
+      if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
     default: {
       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
       break;
@@ -1076,9 +1082,15 @@
           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
                                     kNativeStackAlignment,
                                     GetCriticalNativeDirectCallFrameSize>(invoke);
-      // (callee_method + offset_of_jni_entry_point)()
-      __ call(Address(callee_method.AsRegister<CpuRegister>(),
-                      ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
+      if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
+        DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+        __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
+        RecordBootImageJniEntrypointPatch(invoke);
+      } else {
+        // (callee_method + offset_of_jni_entry_point)()
+        __ call(Address(callee_method.AsRegister<CpuRegister>(),
+                         ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
+      }
       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
       switch (invoke->GetType()) {
@@ -1218,6 +1230,12 @@
   return &string_bss_entry_patches_.back().label;
 }
 
+void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
+  boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
+                                                  invoke->GetResolvedMethodReference().index);
+  __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
+}
+
 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
   if (GetCompilerOptions().IsBootImage()) {
     __ leal(reg,
@@ -1292,6 +1310,7 @@
       package_type_bss_entry_patches_.size() +
       boot_image_string_patches_.size() +
       string_bss_entry_patches_.size() +
+      boot_image_jni_entrypoint_patches_.size() +
       boot_image_other_patches_.size();
   linker_patches->reserve(size);
   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
@@ -1323,6 +1342,8 @@
       package_type_bss_entry_patches_, linker_patches);
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
       string_bss_entry_patches_, linker_patches);
+  EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
+      boot_image_jni_entrypoint_patches_, linker_patches);
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -1394,35 +1415,36 @@
 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
                                          const CompilerOptions& compiler_options,
                                          OptimizingCompilerStats* stats)
-      : CodeGenerator(graph,
-                      kNumberOfCpuRegisters,
-                      kNumberOfFloatRegisters,
-                      kNumberOfCpuRegisterPairs,
-                      ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
-                                          arraysize(kCoreCalleeSaves))
-                          | (1 << kFakeReturnRegister),
-                      ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
-                                          arraysize(kFpuCalleeSaves)),
-                      compiler_options,
-                      stats),
-        block_labels_(nullptr),
-        location_builder_(graph, this),
-        instruction_visitor_(graph, this),
-        move_resolver_(graph->GetAllocator(), this),
-        assembler_(graph->GetAllocator()),
-        constant_area_start_(0),
-        boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-        fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
+    : CodeGenerator(graph,
+                    kNumberOfCpuRegisters,
+                    kNumberOfFloatRegisters,
+                    kNumberOfCpuRegisterPairs,
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+                                        arraysize(kCoreCalleeSaves))
+                        | (1 << kFakeReturnRegister),
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+                                        arraysize(kFpuCalleeSaves)),
+                    compiler_options,
+                    stats),
+      block_labels_(nullptr),
+      location_builder_(graph, this),
+      instruction_visitor_(graph, this),
+      move_resolver_(graph->GetAllocator(), this),
+      assembler_(graph->GetAllocator()),
+      constant_area_start_(0),
+      boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }