ARM/ARM64: Clean up Baker RB introspection codegen.

Remove the guard flags and remove unused code.

Avoid unnecessary temporaries for JIT. This was missed in
    https://android-review.googlesource.com/725705

Test: m test-art-host-gtest
Test: Pixel 2 XL boots.
Test: m test-art-target-gtest
Test: testrunner.py --target --optimizing --jit
Bug: 36141117
Change-Id: Ic1bdc640db3f18d7169b0e62644f190e65a98d38
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 260920c..723446b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -89,15 +89,10 @@
 
 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
-// For the Baker read barrier implementation using link-generated thunks we need to split
+// For the Baker read barrier implementation using link-time generated thunks we need to split
 // the offset explicitly.
 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
 
-// Flags controlling the use of link-time generated thunks for Baker read barriers.
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
-
 // Some instructions have special requirements for a temporary, for example
 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
@@ -680,50 +675,6 @@
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
 };
 
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking.
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
- public:
-  ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
-                               Location ref,
-                               Location entrypoint = Location::NoLocation())
-      : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
-    DCHECK(kEmitCompilerReadBarrier);
-  }
-
-  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
-
-  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = instruction_->GetLocations();
-    DCHECK(locations->CanCall());
-    DCHECK(ref_.IsRegister()) << ref_;
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
-        << "Unexpected instruction in read barrier marking slow path: "
-        << instruction_->DebugName();
-
-    __ Bind(GetEntryLabel());
-    GenerateReadBarrierMarkRuntimeCall(codegen);
-    __ B(GetExitLabel());
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
-};
-
 // Slow path loading `obj`'s lock word, loading a reference from
 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
 // marking `ref` if `obj` is gray according to the lock word (Baker
@@ -2324,17 +2275,16 @@
                                                            : LocationSummary::kNoCall);
   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
-    // We need a temporary register for the read barrier marking slow
-    // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
-    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
-        !Runtime::Current()->UseJitCompilation() &&
-        !field_info.IsVolatile()) {
-      // If link-time thunks for the Baker read barrier are enabled, for AOT
-      // non-volatile loads we need a temporary only if the offset is too big.
+    if (!field_info.IsVolatile()) {
+      // We need a temporary register for the read barrier load in
+      // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
+      // only if the offset is too big.
       if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
         locations->AddTemp(FixedTempLocation());
       }
     } else {
+      // Volatile fields need a temporary register for the read barrier marking slow
+      // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
       locations->AddTemp(Location::RequiresRegister());
     }
   }
@@ -2798,14 +2748,11 @@
                                                            : LocationSummary::kNoCall);
   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
-    // We need a temporary register for the read barrier marking slow
-    // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
-    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
-        !Runtime::Current()->UseJitCompilation() &&
-        instruction->GetIndex()->IsConstant()) {
+    if (instruction->GetIndex()->IsConstant()) {
       // Array loads with constant index are treated as field loads.
-      // If link-time thunks for the Baker read barrier are enabled, for AOT
-      // constant index loads we need a temporary only if the offset is too big.
+      // We need a temporary register for the read barrier load in
+      // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
+      // only if the offset is too big.
       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
       offset += index << DataType::SizeShift(DataType::Type::kReference);
@@ -2813,6 +2760,8 @@
         locations->AddTemp(FixedTempLocation());
       }
     } else {
+      // We need a non-scratch temporary for the array data pointer in
+      // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier().
       locations->AddTemp(Location::RequiresRegister());
     }
   }
@@ -2868,7 +2817,7 @@
     } else {
       Register temp = WRegisterFrom(locations->GetTemp(0));
       codegen_->GenerateArrayLoadWithBakerReadBarrier(
-          instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
+          out, obj.W(), offset, index, temp, /* needs_null_check */ false);
     }
   } else {
     // General case.
@@ -6276,74 +6225,39 @@
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used.
-      if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
-        // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
-        // the Marking Register) to decide whether we need to enter
-        // the slow path to mark the GC root.
-        //
-        // We use shared thunks for the slow path; shared within the method
-        // for JIT, across methods for AOT. That thunk checks the reference
-        // and jumps to the entrypoint if needed.
-        //
-        //     lr = &return_address;
-        //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
-        //     if (mr) {  // Thread::Current()->GetIsGcMarking()
-        //       goto gc_root_thunk<root_reg>(lr)
-        //     }
-        //   return_address:
 
-        UseScratchRegisterScope temps(GetVIXLAssembler());
-        DCHECK(temps.IsAvailable(ip0));
-        DCHECK(temps.IsAvailable(ip1));
-        temps.Exclude(ip0, ip1);
-        uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+      // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+      // the Marking Register) to decide whether we need to enter
+      // the slow path to mark the GC root.
+      //
+      // We use shared thunks for the slow path; shared within the method
+      // for JIT, across methods for AOT. That thunk checks the reference
+      // and jumps to the entrypoint if needed.
+      //
+      //     lr = &return_address;
+      //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+      //     if (mr) {  // Thread::Current()->GetIsGcMarking()
+      //       goto gc_root_thunk<root_reg>(lr)
+      //     }
+      //   return_address:
 
-        ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
-        vixl::aarch64::Label return_address;
-        __ adr(lr, &return_address);
-        if (fixup_label != nullptr) {
-          __ bind(fixup_label);
-        }
-        static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
-                      "GC root LDR must be 2 instruction (8B) before the return address label.");
-        __ ldr(root_reg, MemOperand(obj.X(), offset));
-        EmitBakerReadBarrierCbnz(custom_data);
-        __ bind(&return_address);
-      } else {
-        // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
-        // the Marking Register) to decide whether we need to enter
-        // the slow path to mark the GC root.
-        //
-        //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
-        //   if (mr) {  // Thread::Current()->GetIsGcMarking()
-        //     // Slow path.
-        //     entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
-        //     root = entrypoint(root);  // root = ReadBarrier::Mark(root);  // Entry point call.
-        //   }
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      DCHECK(temps.IsAvailable(ip0));
+      DCHECK(temps.IsAvailable(ip1));
+      temps.Exclude(ip0, ip1);
+      uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
 
-        // Slow path marking the GC root `root`. The entrypoint will
-        // be loaded by the slow path code.
-        SlowPathCodeARM64* slow_path =
-            new (GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
-        AddSlowPath(slow_path);
-
-        // /* GcRoot<mirror::Object> */ root = *(obj + offset)
-        if (fixup_label == nullptr) {
-          __ Ldr(root_reg, MemOperand(obj, offset));
-        } else {
-          EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
-        }
-        static_assert(
-            sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
-            "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
-            "have different sizes.");
-        static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
-                      "art::mirror::CompressedReference<mirror::Object> and int32_t "
-                      "have different sizes.");
-
-        __ Cbnz(mr, slow_path->GetEntryLabel());
-        __ Bind(slow_path->GetExitLabel());
+      ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+      vixl::aarch64::Label return_address;
+      __ adr(lr, &return_address);
+      if (fixup_label != nullptr) {
+        __ bind(fixup_label);
       }
+      static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+                    "GC root LDR must be 2 instruction (8B) before the return address label.");
+      __ ldr(root_reg, MemOperand(obj.X(), offset));
+      EmitBakerReadBarrierCbnz(custom_data);
+      __ bind(&return_address);
     } else {
       // GC root loaded through a slow path for read barriers other
       // than Baker's.
@@ -6380,7 +6294,7 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
-  if (kBakerReadBarrierLinkTimeThunksEnableForFields && !use_load_acquire) {
+  if (!use_load_acquire) {
     // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
     // Marking Register) to decide whether we need to enter the slow
     // path to mark the reference. Then, in the slow path, check the
@@ -6457,8 +6371,7 @@
                                             use_load_acquire);
 }
 
-void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                               Location ref,
+void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(Location ref,
                                                                Register obj,
                                                                uint32_t data_offset,
                                                                Location index,
@@ -6472,76 +6385,57 @@
       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
 
-  if (kBakerReadBarrierLinkTimeThunksEnableForArrays) {
-    // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
-    // Marking Register) to decide whether we need to enter the slow
-    // path to mark the reference. Then, in the slow path, check the
-    // gray bit in the lock word of the reference's holder (`obj`) to
-    // decide whether to mark `ref` or not.
-    //
-    // We use shared thunks for the slow path; shared within the method
-    // for JIT, across methods for AOT. That thunk checks the holder
-    // and jumps to the entrypoint if needed. If the holder is not gray,
-    // it creates a fake dependency and returns to the LDR instruction.
-    //
-    //     lr = &gray_return_address;
-    //     if (mr) {  // Thread::Current()->GetIsGcMarking()
-    //       goto array_thunk<base_reg>(lr)
-    //     }
-    //   not_gray_return_address:
-    //     // Original reference load. If the offset is too large to fit
-    //     // into LDR, we use an adjusted base register here.
-    //     HeapReference<mirror::Object> reference = data[index];
-    //   gray_return_address:
+  // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+  // Marking Register) to decide whether we need to enter the slow
+  // path to mark the reference. Then, in the slow path, check the
+  // gray bit in the lock word of the reference's holder (`obj`) to
+  // decide whether to mark `ref` or not.
+  //
+  // We use shared thunks for the slow path; shared within the method
+  // for JIT, across methods for AOT. That thunk checks the holder
+  // and jumps to the entrypoint if needed. If the holder is not gray,
+  // it creates a fake dependency and returns to the LDR instruction.
+  //
+  //     lr = &gray_return_address;
+  //     if (mr) {  // Thread::Current()->GetIsGcMarking()
+  //       goto array_thunk<base_reg>(lr)
+  //     }
+  //   not_gray_return_address:
+  //     // Original reference load. If the offset is too large to fit
+  //     // into LDR, we use an adjusted base register here.
+  //     HeapReference<mirror::Object> reference = data[index];
+  //   gray_return_address:
 
-    DCHECK(index.IsValid());
-    Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
-    Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
+  DCHECK(index.IsValid());
+  Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
+  Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
 
-    UseScratchRegisterScope temps(GetVIXLAssembler());
-    DCHECK(temps.IsAvailable(ip0));
-    DCHECK(temps.IsAvailable(ip1));
-    temps.Exclude(ip0, ip1);
-    uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  DCHECK(temps.IsAvailable(ip0));
+  DCHECK(temps.IsAvailable(ip1));
+  temps.Exclude(ip0, ip1);
+  uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
 
-    __ Add(temp.X(), obj.X(), Operand(data_offset));
-    {
-      ExactAssemblyScope guard(GetVIXLAssembler(),
-                               (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
-      vixl::aarch64::Label return_address;
-      __ adr(lr, &return_address);
-      EmitBakerReadBarrierCbnz(custom_data);
-      static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
-                    "Array LDR must be 1 instruction (4B) before the return address label; "
-                    " 2 instructions (8B) for heap poisoning.");
-      __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
-      DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
-      // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
-      // macro instructions disallowed in ExactAssemblyScope.
-      if (kPoisonHeapReferences) {
-        __ neg(ref_reg, Operand(ref_reg));
-      }
-      __ bind(&return_address);
+  __ Add(temp.X(), obj.X(), Operand(data_offset));
+  {
+    ExactAssemblyScope guard(GetVIXLAssembler(),
+                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+    vixl::aarch64::Label return_address;
+    __ adr(lr, &return_address);
+    EmitBakerReadBarrierCbnz(custom_data);
+    static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Array LDR must be 1 instruction (4B) before the return address label; "
+                  " 2 instructions (8B) for heap poisoning.");
+    __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+    // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
+    // macro instructions disallowed in ExactAssemblyScope.
+    if (kPoisonHeapReferences) {
+      __ neg(ref_reg, Operand(ref_reg));
     }
-    MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
-    return;
+    __ bind(&return_address);
   }
-
-  // Array cells are never volatile variables, therefore array loads
-  // never use Load-Acquire instructions on ARM64.
-  const bool use_load_acquire = false;
-
-  // /* HeapReference<Object> */ ref =
-  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  GenerateReferenceLoadWithBakerReadBarrier(instruction,
-                                            ref,
-                                            obj,
-                                            data_offset,
-                                            index,
-                                            scale_factor,
-                                            temp,
-                                            needs_null_check,
-                                            use_load_acquire);
+  MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
 }
 
 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,