ARM/ARM64: Use trampolines for slow-path entrypoint calls.

This reduces the size of the generated code. We do this only
for AOT compilation where we get the most benefit.

Sizes of aosp_taimen-userdebug prebuilts:
 - before:
   arm/boot*.oat: 19624804
   arm64/boot*.oat: 23265752
   oat/arm64/services.odex: 22417968
 - after:
   arm/boot*.oat: 19460500 (-160KiB)
   arm64/boot*.oat: 22957928 (-301KiB)
   oat/arm64/services.odex: 21957864 (-449KiB)

Test: m test-art-host-gtest
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 12607709
Change-Id: Ie9dbd1ba256173e4e439e8bbb8832a791965cbe6
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index ada5742..2680bd0 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -629,6 +629,9 @@
                                                dex::StringIndex string_index,
                                                vixl::aarch64::Label* adrp_label = nullptr);
 
+  // Emit the BL instruction for entrypoint thunk call and record the associated patch for AOT.
+  void EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset);
+
   // Emit the CBNZ instruction for baker read barrier and record
   // the associated patch for AOT or slow path for JIT.
   void EmitBakerReadBarrierCbnz(uint32_t custom_data);
@@ -887,10 +890,6 @@
   ParallelMoveResolverARM64 move_resolver_;
   Arm64Assembler assembler_;
 
-  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
-  Uint32ToLiteralMap uint32_literals_;
-  // Deduplication map for 64-bit literals, used for non-patchable method address or method code.
-  Uint64ToLiteralMap uint64_literals_;
   // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo.
   // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
@@ -906,9 +905,15 @@
   ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
   // PC-relative patch info for IntrinsicObjects.
   ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_;
+  // Patch info for calls to entrypoint dispatch thunks. Used for slow paths.
+  ArenaDeque<PatchInfo<vixl::aarch64::Label>> call_entrypoint_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
+  // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Deduplication map for 64-bit literals, used for JIT for method address or method code.
+  Uint64ToLiteralMap uint64_literals_;
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
   // Patches for class literals in JIT compiled code.