Faster @CriticalNative for boot image.

The @CriticalNative call does not need the target method, so
we can avoid one instruction on x86, x86-64 and arm64. The
current approach for arm does not allow such optimization.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_blueline-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --64 --optimizing
Bug: 112189621
Change-Id: I11b7e415be2697757cbb11c9cccf4058d1d72d7d
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 007aa43..5920a48 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -954,6 +954,7 @@
       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -4576,24 +4577,35 @@
       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
       return;  // No code pointer retrieval; the runtime performs the call directly.
     }
+    case MethodLoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+      if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+        // Do not materialize the method pointer, load directly the entrypoint.
+        // Add ADRP with its PC-relative JNI entrypoint patch.
+        vixl::aarch64::Label* adrp_label =
+            NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
+        EmitAdrpPlaceholder(adrp_label, lr);
+        // Add the LDR with its PC-relative method patch.
+        vixl::aarch64::Label* add_label =
+            NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), adrp_label);
+        EmitLdrOffsetPlaceholder(add_label, lr, lr);
+        break;
+      }
+      FALLTHROUGH_INTENDED;
     default: {
       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
       break;
     }
   }
 
-  auto call_code_pointer_member = [&](MemberOffset offset) {
-    // LR = callee_method->member;
-    __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
-    {
-      // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
-      ExactAssemblyScope eas(GetVIXLAssembler(),
-                             kInstructionSize,
-                             CodeBufferCheckScope::kExactSize);
-      // lr()
-      __ blr(lr);
-      RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
-    }
+  auto call_lr = [&]() {
+    // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+    ExactAssemblyScope eas(GetVIXLAssembler(),
+                           kInstructionSize,
+                           CodeBufferCheckScope::kExactSize);
+    // lr()
+    __ blr(lr);
+    RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   };
   switch (invoke->GetCodePtrLocation()) {
     case CodePtrLocation::kCallSelf:
@@ -4611,7 +4623,15 @@
           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
                                     kAapcs64StackAlignment,
                                     GetCriticalNativeDirectCallFrameSize>(invoke);
-      call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArm64PointerSize));
+      if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
+        call_lr();
+      } else {
+        // LR = callee_method->ptr_sized_fields_.data_;  // EntryPointFromJni
+        MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kArm64PointerSize);
+        __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
+        // lr()
+        call_lr();
+      }
       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
       switch (invoke->GetType()) {
         case DataType::Type::kBool:
@@ -4641,9 +4661,14 @@
       }
       break;
     }
-    case CodePtrLocation::kCallArtMethod:
-      call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize));
+    case CodePtrLocation::kCallArtMethod: {
+      // LR = callee_method->ptr_sized_fields_.entry_point_from_quick_compiled_code_;
+      MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
+      __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
+      // lr()
+      call_lr();
       break;
+    }
   }
 
   DCHECK(!IsLeafMethod());
@@ -4814,6 +4839,13 @@
   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
 }
 
+vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageJniEntrypointPatch(
+    MethodReference target_method,
+    vixl::aarch64::Label* adrp_label) {
+  return NewPcRelativePatch(
+      target_method.dex_file, target_method.index, adrp_label, &boot_image_jni_entrypoint_patches_);
+}
+
 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
   DCHECK(!GetCompilerOptions().IsJitCompiler());
@@ -4980,6 +5012,7 @@
       package_type_bss_entry_patches_.size() +
       boot_image_string_patches_.size() +
       string_bss_entry_patches_.size() +
+      boot_image_jni_entrypoint_patches_.size() +
       boot_image_other_patches_.size() +
       call_entrypoint_patches_.size() +
       baker_read_barrier_patches_.size();
@@ -5013,6 +5046,8 @@
       package_type_bss_entry_patches_, linker_patches);
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
       string_bss_entry_patches_, linker_patches);
+  EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
+      boot_image_jni_entrypoint_patches_, linker_patches);
   for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
     DCHECK(info.target_dex_file == nullptr);
     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(