ARM64: Heap poisoning for link-time Baker CC read barrier thunks.

And fix running out of scratch registers for HArraySet
with large constant index and a reference to poison.

Test: Nexus 6P boots with heap poisoning enabled.
Test: testrunner.py --target with heap poisoning enabled on Nexus 6P.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: Ifb38f4a0e23a2963468772f34f294febfc340b8c
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 53797d2..551c73b 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -383,9 +383,14 @@
   static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
-  static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == -4, "Check field LDR offset");
-  static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == -4, "Check array LDR offset");
-  __ Sub(lr, lr, 4);  // Adjust the return address one instruction back to the LDR.
+  static_assert(
+      BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
+      "Field and array LDR offsets must be the same to reuse the same code.");
+  // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
+  static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                "Field LDR must be 1 instruction (4B) before the return address label; "
+                " 2 instructions (8B) for heap poisoning.");
+  __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
   // Introduce a dependency on the lock_word including rb_state,
   // to prevent load-load reordering, and without using
   // a memory barrier (which would be more expensive).
@@ -431,8 +436,9 @@
       __ Bind(&slow_path);
       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
-      __ Ubfx(ip0, ip0, 10, 12);            // Extract the offset.
+      __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
       __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
+      // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
       __ Br(ip1);                           // Jump to the entrypoint.
       if (holder_reg.Is(base_reg)) {
         // Add null check slow path. The stack map is at the address pointed to by LR.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4955562..4629c54 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -90,9 +90,8 @@
 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
 
 // Flags controlling the use of link-time generated thunks for Baker read barriers.
-// Not yet implemented for heap poisoning.
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = !kPoisonHeapReferences;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = !kPoisonHeapReferences;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
 
 // Some instructions have special requirements for a temporary, for example
 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
@@ -3053,6 +3052,11 @@
 
       if (!index.IsConstant()) {
         __ Add(temp, array, offset);
+      } else {
+        // We no longer need the `temp` here so release it as the store below may
+        // need a scratch register (if the constant index makes the offset too large)
+        // and the poisoned `source` could be using the other scratch register.
+        temps.Release(temp);
       }
       {
         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
@@ -6093,17 +6097,21 @@
     const int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
     __ Ldr(ip1, MemOperand(tr, entry_point_offset));
-    EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+    EmissionCheckScope guard(GetVIXLAssembler(),
+                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
     vixl::aarch64::Label return_address;
     __ adr(lr, &return_address);
     __ Bind(cbnz_label);
     __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
-    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == -4,
-                  "Field LDR must be 1 instruction (4B) before the return address label.");
-    __ ldr(RegisterFrom(ref, Primitive::kPrimNot), MemOperand(base.X(), offset));
+    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Field LDR must be 1 instruction (4B) before the return address label; "
+                  " 2 instructions (8B) for heap poisoning.");
+    Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+    __ ldr(ref_reg, MemOperand(base.X(), offset));
     if (needs_null_check) {
       MaybeRecordImplicitNullCheck(instruction);
     }
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
     __ Bind(&return_address);
     return;
   }
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index cfcd6a7..6b77200 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -32,9 +32,17 @@
 #define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET 0x300
 
 // The offset of the reference load LDR from the return address in LR for field loads.
+#ifdef USE_HEAP_POISONING
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -8
+#else
 #define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -4
+#endif
 // The offset of the reference load LDR from the return address in LR for array loads.
+#ifdef USE_HEAP_POISONING
+#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -8
+#else
 #define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4
+#endif
 // The offset of the reference load LDR from the return address in LR for GC root loads.
 #define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET -8
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index c7fa7f5..d043962 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2649,7 +2649,8 @@
      *
      * For field accesses and array loads with a constant index the thunk loads
      * the reference into IP0 using introspection and calls the main entrypoint,
-     * art_quick_read_barrier_mark_introspection.
+     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
+     * the passed reference is poisoned.
      *
      * For array accesses with non-constant index, the thunk inserts the bits
      * 16-21 of the LDR instruction to the entrypoint address, effectively
@@ -2663,6 +2664,7 @@
      *
      * For GC root accesses we cannot use the main entrypoint because of the
      * different offset where the LDR instruction in generated code is located.
+     * (And even with heap poisoning enabled, GC roots are not poisoned.)
      * To re-use the same entrypoint pointer in generated code, we make sure
      * that the gc root entrypoint (a copy of the entrypoint with a different
      * offset for introspection loads) is located at a known offset (768 bytes,
@@ -2686,6 +2688,8 @@
     .balign 512
 ENTRY art_quick_read_barrier_mark_introspection
     // At this point, IP0 contains the reference, IP1 can be freely used.
+    // For heap poisoning, the reference is poisoned, so unpoison it first.
+    UNPOISON_HEAP_REF wIP0
     // If reference is null, just return it in the right register.
     cbz   wIP0, .Lmark_introspection_return
     // Use wIP1 as temp and check the mark bit of the reference.