ARM64: Use link-time generated thunks for Baker CC read barrier.

Remaining work for follow-up CLs:
  - array loads,
  - volatile field loads,
  - use implicit null check in field thunk.

Test: Added tests to relative_patcher_arm64
Test: New run-test 160-read-barrier-stress
Test: m test-art-target-gtest on Nexus 6P.
Test: testrunner.py --target on Nexus 6P.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: Id68ff171c55a3f1bf1ac1b657f480531aa7b3710
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 423fd3c..77dcb5a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2507,9 +2507,11 @@
     // We use a block to end the scratch scope before the write barrier, thus
     // freeing the temporary registers so they can be used in `MarkGCCard`.
     UseScratchRegisterScope temps(masm);
+    Location temp3_loc;  // Used only for Baker read barrier.
     Register temp3;
     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-      temp3 = WRegisterFrom(locations->GetTemp(2));
+      temp3_loc = locations->GetTemp(2);
+      temp3 = WRegisterFrom(temp3_loc);
     } else {
       temp3 = temps.AcquireW();
     }
@@ -2527,7 +2529,7 @@
                                                           temp1_loc,
                                                           src.W(),
                                                           class_offset,
-                                                          temp2,
+                                                          temp3_loc,
                                                           /* needs_null_check */ false,
                                                           /* use_load_acquire */ false);
           // Bail out if the source is not a non primitive array.
@@ -2536,7 +2538,7 @@
                                                           temp1_loc,
                                                           temp1,
                                                           component_offset,
-                                                          temp2,
+                                                          temp3_loc,
                                                           /* needs_null_check */ false,
                                                           /* use_load_acquire */ false);
           __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
@@ -2553,7 +2555,7 @@
                                                         temp1_loc,
                                                         dest.W(),
                                                         class_offset,
-                                                        temp2,
+                                                        temp3_loc,
                                                         /* needs_null_check */ false,
                                                         /* use_load_acquire */ false);
 
@@ -2570,7 +2572,7 @@
                                                           temp2_loc,
                                                           temp1,
                                                           component_offset,
-                                                          temp3,
+                                                          temp3_loc,
                                                           /* needs_null_check */ false,
                                                           /* use_load_acquire */ false);
           __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
@@ -2589,7 +2591,7 @@
                                                         temp2_loc,
                                                         src.W(),
                                                         class_offset,
-                                                        temp3,
+                                                        temp3_loc,
                                                         /* needs_null_check */ false,
                                                         /* use_load_acquire */ false);
         // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
@@ -2603,7 +2605,7 @@
                                                           temp1_loc,
                                                           temp1,
                                                           component_offset,
-                                                          temp2,
+                                                          temp3_loc,
                                                           /* needs_null_check */ false,
                                                           /* use_load_acquire */ false);
           // /* HeapReference<Class> */ temp1 = temp1->super_class_
@@ -2687,7 +2689,7 @@
                                                         temp1_loc,
                                                         src.W(),
                                                         class_offset,
-                                                        temp2,
+                                                        temp3_loc,
                                                         /* needs_null_check */ false,
                                                         /* use_load_acquire */ false);
         // /* HeapReference<Class> */ temp2 = temp1->component_type_
@@ -2695,7 +2697,7 @@
                                                         temp2_loc,
                                                         temp1,
                                                         component_offset,
-                                                        temp3,
+                                                        temp3_loc,
                                                         /* needs_null_check */ false,
                                                         /* use_load_acquire */ false);
         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());