ARM/ARM64: Use trampolines for slow-path entrypoint calls.

This reduces the size of the generated code. We do this only
for AOT compilation where we get the most benefit.

Sizes of aosp_taimen-userdebug prebuilts:
 - before:
   arm/boot*.oat: 19624804
   arm64/boot*.oat: 23265752
   oat/arm64/services.odex: 22417968
 - after:
   arm/boot*.oat: 19460500 (-160KiB)
   arm64/boot*.oat: 22957928 (-301KiB)
   oat/arm64/services.odex: 21957864 (-449KiB)

Test: m test-art-host-gtest
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 12607709
Change-Id: Ie9dbd1ba256173e4e439e8bbb8832a791965cbe6
diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h
index f9e3930..1c523de 100644
--- a/compiler/linker/linker_patch.h
+++ b/compiler/linker/linker_patch.h
@@ -52,6 +52,7 @@
     kTypeBssEntry,
     kStringRelative,
     kStringBssEntry,
+    kCallEntrypoint,
     kBakerReadBarrierBranch,
   };
 
@@ -141,6 +142,15 @@
     return patch;
   }
 
+  static LinkerPatch CallEntrypointPatch(size_t literal_offset,
+                                         uint32_t entrypoint_offset) {
+    LinkerPatch patch(literal_offset,
+                      Type::kCallEntrypoint,
+                      /* target_dex_file= */ nullptr);
+    patch.entrypoint_offset_ = entrypoint_offset;
+    return patch;
+  }
+
   static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset,
                                                  uint32_t custom_value1 = 0u,
                                                  uint32_t custom_value2 = 0u) {
@@ -216,6 +226,11 @@
     return pc_insn_offset_;
   }
 
+  uint32_t EntrypointOffset() const {
+    DCHECK(patch_type_ == Type::kCallEntrypoint);
+    return entrypoint_offset_;
+  }
+
   uint32_t GetBakerCustomValue1() const {
     DCHECK(patch_type_ == Type::kBakerReadBarrierBranch);
     return baker_custom_value1_;
@@ -249,6 +264,7 @@
     uint32_t type_idx_;           // Type index for Type patches.
     uint32_t string_idx_;         // String index for String patches.
     uint32_t intrinsic_data_;     // Data for IntrinsicObjects.
+    uint32_t entrypoint_offset_;  // Entrypoint offset in the Thread object.
     uint32_t baker_custom_value1_;
     static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
     static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");